-- rafactored data_aware_initialization

manujosephv · manujosephv · commit 2998ac2840ae · 2021-06-19T16:51:33.000+05:30
-- added docs
diff --git a/docs/models.md b/docs/models.md
@@ -128,6 +128,8 @@ There are two methods that need to be defined in any class that inherits the Bas
 
 While this is the bare minimum, you can redefine or use any of the Pytorch Lightning standard methods to tweak your model and training to your liking.
 
+If your model needs to use custom data-aware initialization techniques(like NODE), you can override `data_aware_initialization(self, datamodule)` in the model. In here you have access to the datamodule and the dataloaders for initialization.
+
 In addition to the model, you will also need to define a config. Configs are python dataclasses and should inherit `ModelConfig` and will have all the parameters of the ModelConfig. by default. Any additional parameter should be defined in the dataclass. 
 
 
diff --git a/pytorch_tabular/models/base_model.py b/pytorch_tabular/models/base_model.py
@@ -158,6 +158,9 @@ def calculate_metrics(self, y, y_hat, tag):
                 prog_bar=True,
             )
         return metrics
+    
+    def data_aware_initialization(self, datamodule):
+        pass
 
     @abstractmethod
     def forward(self, x: Dict):
diff --git a/pytorch_tabular/models/node/node_model.py b/pytorch_tabular/models/node/node_model.py
@@ -64,6 +64,23 @@ def __init__(self, config: DictConfig, **kwargs):
     def subset(self, x):
             return x[..., : self.hparams.output_dim].mean(dim=-2)
 
+    def data_aware_initialization(self, datamodule):
+        """Performs data-aware initialization for NODE"""
+        logger.info("Data Aware Initialization....")
+        # Need a big batch to initialize properly
+        alt_loader = datamodule.train_dataloader(batch_size=2000)
+        batch = next(iter(alt_loader))
+        for k, v in batch.items():
+            if isinstance(v, list) and (len(v) == 0):
+                # Skipping empty list
+                continue
+            # batch[k] = v.to("cpu" if self.config.gpu == 0 else "cuda")
+            batch[k] = v.to(self.device)
+
+        # single forward pass to initialize the ODST
+        with torch.no_grad():
+            self(batch)
+ 
     def _build_network(self):
         if self.hparams.embed_categorical:
             self.embedding_layers = nn.ModuleList(
diff --git a/pytorch_tabular/tabular_model.py b/pytorch_tabular/tabular_model.py
@@ -79,16 +79,6 @@ def __init__(
             ), "If `config` is None, `data_config`, `model_config`, `trainer_config`, and `optimizer_config` cannot be None"
             data_config = self._read_parse_config(data_config, DataConfig)
             model_config = self._read_parse_config(model_config, ModelConfig)
-            # # Re-routing to Categorical embedding Model if embed_categorical is true for NODE
-            # if (
-            #     hasattr(model_config, "_model_name")
-            #     and (model_config._model_name == "NODEModel")
-            #     and (model_config.embed_categorical)
-            #     and ("CategoryEmbedding" not in model_config._model_name)
-            # ):
-            #     model_config._model_name = (
-            #         "CategoryEmbedding" + model_config._model_name
-            #     )
             trainer_config = self._read_parse_config(trainer_config, TrainerConfig)
             optimizer_config = self._read_parse_config(
                 optimizer_config, OptimizerConfig
@@ -255,23 +245,6 @@ def _prepare_callbacks(self) -> List:
         logger.debug(f"Callbacks used: {callbacks}")
         return callbacks
 
-    def data_aware_initialization(self):
-        """Performs data-aware initialization for NODE"""
-        logger.info("Data Aware Initialization....")
-        # Need a big batch to initialize properly
-        alt_loader = self.datamodule.train_dataloader(batch_size=2000)
-        batch = next(iter(alt_loader))
-        for k, v in batch.items():
-            if isinstance(v, list) and (len(v) == 0):
-                # Skipping empty list
-                continue
-            # batch[k] = v.to("cpu" if self.config.gpu == 0 else "cuda")
-            batch[k] = v.to(self.model.device)
-
-        # single forward pass to initialize the ODST
-        with torch.no_grad():
-            self.model(batch)
-
     def _prepare_dataloader(
         self, train, validation, test, target_transform=None, train_sampler=None
     ):
@@ -312,9 +285,9 @@ def _prepare_model(self, loss, metrics, optimizer, optimizer_params, reset):
                 custom_optimizer=optimizer,
                 custom_optimizer_params=optimizer_params,
             )
-            # Data Aware Initialization (NODE)
-            if self.config._model_name in ["NODEModel"]:
-                self.data_aware_initialization()
+            # Data Aware Initialization(for the models that need it)
+            self.model.data_aware_initialization(self.datamodule)
+            
 
     def _prepare_trainer(self, max_epochs=None, min_epochs=None):
         logger.info("Preparing the Trainer...")
@@ -459,9 +432,8 @@ def fit(
         self.model.train()
         if self.config.auto_lr_find and (not self.config.fast_dev_run):
             self.trainer.tune(self.model, train_loader, val_loader)
-            # Parameters in NODE needs to be initialized again
-            if self.config._model_name in ["CategoryEmbeddingNODEModel", "NODEModel"]:
-                self.data_aware_initialization()
+            # Parameters in models needs to be initialized again after LR find
+            self.model.data_aware_initialization(self.datamodule)
         self.model.train()
         self.trainer.fit(self.model, train_loader, val_loader)
         logger.info("Training the model completed...")

Original file line number	Diff line number	Diff line change
`@@ -158,6 +158,9 @@ def calculate_metrics(self, y, y_hat, tag):`
`158`	`158`	`prog_bar=True,`
`159`	`159`	`)`
`160`	`160`	`return metrics`
	`161`	`+`
	`162`	`+ def data_aware_initialization(self, datamodule):`
	`163`	`+ pass`
`161`	`164`
`162`	`165`	`@abstractmethod`
`163`	`166`	`def forward(self, x: Dict):`