From 0a0a4c6ab7e43c569d3a2be62a1bf0c35ca14dcf Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Tue, 15 Mar 2022 15:02:36 -0400
Subject: [PATCH 01/50] Change readme to trigger test

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index ad2294aa62..cc45a701b1 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 [![Downloads](https://pepy.tech/badge/flaml)](https://pepy.tech/project/flaml)
 [![Join the chat at https://gitter.im/FLAMLer/community](https://badges.gitter.im/FLAMLer/community.svg)](https://gitter.im/FLAMLer/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 
-# A Fast Library for Automated Machine Learning & Tuning
+# A Fast Library for Automated Machine Learning & Tuning.
 
 <p align="center">
     <img src="https://github.com/microsoft/FLAML/blob/main/website/static/img/flaml.svg"  width=200>

From 002683f3d6df2258316ad3b68afe6e8d564104eb Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Tue, 15 Mar 2022 15:31:41 -0400
Subject: [PATCH 02/50] add dependencies for AG

---
 .github/workflows/python-package.yml | 4 ++++
 setup.py                             | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index cebff3fbe1..3404d1824b 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -47,6 +47,10 @@ jobs:
         run: |
           pip install -e .[ray,forecast]
           pip install 'tensorboardX<=2.2'
+      - name: If python version > 3.6, install autogluon
+        if: matrix.python-version >= '3.7'
+        run: |
+          pip install -e .[autogluon]
       - name: Lint with flake8
         run: |
           # stop the build if there are Python syntax errors or undefined names
diff --git a/setup.py b/setup.py
index 907f1fe50f..f90b427fad 100644
--- a/setup.py
+++ b/setup.py
@@ -63,6 +63,11 @@
             "hcrystalball==0.1.10",
             "seqeval",
         ],
+        "autogluon": [
+            "mxnet<2.0.0",
+            "autogluon.text==0.4.0", 
+            "autogluon.features==0.4.0",
+            ],
         "catboost": ["catboost>=0.26"],
         "blendsearch": ["optuna==2.8.0"],
         "ray": [

From 60a847c87eed5edb7b5761bd9f3126b3e3ee9727 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Tue, 15 Mar 2022 16:29:30 -0400
Subject: [PATCH 03/50] add user permission to test_notebook_example L81

---
 flaml/model.py                       | 155 +++++++++++++++++++++++++++
 setup.py                             |   4 +-
 test/automl/test_notebook_example.py |   2 +-
 3 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/flaml/model.py b/flaml/model.py
index 9e6c20a1a3..d9f4700ce3 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -1965,6 +1965,161 @@ class XGBoostLimitDepth_TS(TS_SKLearn):
     base_class = XGBoostLimitDepthEstimator
 
 
+class AGTextPredictorEstimator(BaseEstimator):
+    """
+    The class for tuning AutoGluon TextPredictor
+    """
+    def __init__(self, task="binary", **params,):
+        from autogluon.text.text_prediction.mx_predictor import MXTextPredictor
+
+        super().__init__(task, **params)
+        self.estimator_class = MXTextPredictor
+
+    @classmethod
+    def search_space(cls, **params):
+        # Add the possible search space configs here, e.g. 'optimization.lr'
+        # reference: 
+        # https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values
+        search_space_dict = {
+            'model.network.agg_net.mid_units': {
+                                                "domain": tune.choice(list(range(32, 129))),
+                                                "init_value": 128
+                                                },
+            'optimization.lr': {
+                                "domain": tune.loguniform(lower=1E-5, upper=1E-4),
+                                "init_value": 1E-4,
+                                },
+            'optimization.wd':{
+                                "domain": tune.choice([1E-4, 1E-3, 1E-2]),
+                                "init_value":1E-4
+                                },
+            'optimization.warmup_portion': {
+                                 "domain": tune.choice([0.1, 0.2]), 
+                                "init_value":0.1, 
+                                },
+        }
+        return search_space_dict
+
+    def _init_fix_args(self, automl_fit_kwargs: dict=None):
+        """
+        Save the customed fix args here
+        this includes:
+            "output_dir",
+            "text_backbone": "electra_base"
+            "multimodal_fusion_strategy":"fuse_late", 
+        """
+        fix_args = {}
+        FIX_ARGS_LIST = ["output_dir", "dataset_name", "label_column", "per_device_batch_size",
+                         "text_backbone", "multimodal_fusion_strategy", ]
+        for key, value in automl_fit_kwargs["custom_fix_args"].items():
+            assert (
+                key in FIX_ARGS_LIST
+            ), "The specified key {} is not in the argument list: output_dir, label_column, dataset_name, text_backbone,\
+                multimodal_fusion_strategy".format(key)
+            
+            fix_args[key] = value
+
+        self.fix_args = fix_args
+
+    def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str):
+
+        """"
+        Ref:
+        https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values
+        """
+        from autogluon.text.text_prediction.legacy_presets import ag_text_presets
+
+        base_key = f'{text_backbone}_{multimodal_fusion_strategy}'
+        cfg = ag_text_presets.create(base_key)
+        # NOTE: if the search_space() is modified, add new items or delete here too.
+        TUNABLE_HP = set(['model.network.agg_net.mid_units',
+                      'optimization.batch_size',
+                      'optimization.layerwise_lr_decay',
+                      'optimization.lr',
+                      'optimization.nbest',
+                      'optimization.num_train_epochs',
+                      'optimization.per_device_batch_size',
+                      'optimization.wd',
+                      'optimization.warmup_portion',
+                     ])
+        search_space = cfg['models']['MultimodalTextModel']['search_space']
+        for key, value in self.params.items():
+            if key in TUNABLE_HP:
+                # NOTE: FLAML uses np.float64 but AG uses float, need to transform
+                if isinstance(value, np.float64):
+                    search_space[key] = value.item()
+                else:
+                    search_space[key] = value
+            search_space['optimization.per_device_batch_size'] = self.fix_args['per_device_batch_size']
+        return cfg
+   
+    def _set_seed(self, seed):
+        import random
+        import mxnet as mx
+        import torch as th
+        th.manual_seed(seed)
+        mx.random.seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+
+    def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
+        self._kwargs = kwargs
+        self._init_fix_args(kwargs)
+        # the seed set in the bash script for ag experiment is 123
+        seed = self.params.get("seed", 123)
+        self._set_seed(seed)
+        
+        # get backbone and fusion strategy
+        text_backbone=self.fix_args["text_backbone"]
+        multimodal_fusion_strategy=self.fix_args["multimodal_fusion_strategy"]
+
+        # get & set the save dir, get the dataset info
+        save_dir = self.fix_args["output_dir"]
+        label_column = self.fix_args["label_column"]
+        dataset_name = self.fix_args["dataset_name"]
+        ag_model_save_dir = os.path.join(save_dir, f"{dataset_name}_ag_text_multimodal_{text_backbone}\
+                                                    _{multimodal_fusion_strategy}_no_ensemble")
+        
+        # set the of the hyperparameters
+        self.hyperparameters = self._init_hp_config(text_backbone, multimodal_fusion_strategy)
+        PROBLEM_TYPE_MAPPING = {"binary": "binary", "multi": "multiclass", "regression": "regression"}
+        TASK_METRIC_MAPPING = {"multi": "acc", "binary": "roc_auc", "regression": "r2"}
+       
+       # train the model
+        start_time = time.time()
+        
+        self._model = self.estimator_class(path=save_dir,
+                                           label=label_column,
+                                           problem_type=PROBLEM_TYPE_MAPPING[self._task],
+                                           eval_metric=TASK_METRIC_MAPPING[self._task])
+        
+        train_data = self._kwargs["train_data"]
+
+        self._model.fit(train_data=train_data,
+                        hyperparameters=self.hyperparameters,
+                        time_limit=budget, 
+                        seed=seed)
+ 
+        training_time = time.time() - start_time
+        return training_time
+    
+    def predict(self, X, as_pandas=False):
+        output = self._model.predict(self._kwargs["valid_data"], as_pandas=as_pandas)
+        return output
+
+
+    def predict_proba(self, X, as_pandas=False, as_multiclass=True):
+        # only works for classification tasks
+        assert (
+            self._task in CLASSIFICATION
+        ), "predict_proba() only for classification tasks."
+
+        output = self._model.predict_proba(self._kwargs["valid_data"], as_pandas=as_pandas)
+        if not as_multiclass:
+            if self._task == "binary":
+                output = output[:, 1]
+        return output
+
 class suppress_stdout_stderr(object):
     def __init__(self):
         # Open a pair of null files
diff --git a/setup.py b/setup.py
index f90b427fad..24a622b01f 100644
--- a/setup.py
+++ b/setup.py
@@ -65,9 +65,9 @@
         ],
         "autogluon": [
             "mxnet<2.0.0",
-            "autogluon.text==0.4.0", 
+            "autogluon.text==0.4.0",
             "autogluon.features==0.4.0",
-            ],
+        ],
         "catboost": ["catboost>=0.26"],
         "blendsearch": ["optuna==2.8.0"],
         "ray": [
diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py
index 1afb569eb2..adcfdf3298 100644
--- a/test/automl/test_notebook_example.py
+++ b/test/automl/test_notebook_example.py
@@ -78,7 +78,7 @@ def test_mlflow():
     import subprocess
     import sys
 
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"])
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow", "--user"])
     import mlflow
     from flaml.data import load_openml_task
 

From 60a9e2707c6d84b3ea9dd08b7d2d48eb288c4bb1 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Tue, 15 Mar 2022 20:09:26 -0400
Subject: [PATCH 04/50] add mlflow dependency to setup

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 24a622b01f..0f7b19f9c5 100644
--- a/setup.py
+++ b/setup.py
@@ -62,6 +62,7 @@
             "rouge_score",
             "hcrystalball==0.1.10",
             "seqeval",
+            "mlflow",
         ],
         "autogluon": [
             "mxnet<2.0.0",

From bc7f38db3bc1b922c16e22fe9af685ba43c5818b Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Wed, 16 Mar 2022 09:31:14 -0400
Subject: [PATCH 05/50] add textpredictor estimator and test

---
 README.md                            |   2 +-
 flaml/ml.py                          |   3 +
 flaml/model.py                       |   9 +-
 test/automl/test_notebook_example.py |   2 +-
 test/test_agtextpredictor.py         | 132 +++++++++++++++++++++++++++
 5 files changed, 141 insertions(+), 7 deletions(-)
 create mode 100644 test/test_agtextpredictor.py

diff --git a/README.md b/README.md
index cc45a701b1..ad2294aa62 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 [![Downloads](https://pepy.tech/badge/flaml)](https://pepy.tech/project/flaml)
 [![Join the chat at https://gitter.im/FLAMLer/community](https://badges.gitter.im/FLAMLer/community.svg)](https://gitter.im/FLAMLer/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 
-# A Fast Library for Automated Machine Learning & Tuning.
+# A Fast Library for Automated Machine Learning & Tuning
 
 <p align="center">
     <img src="https://github.com/microsoft/FLAML/blob/main/website/static/img/flaml.svg"  width=200>
diff --git a/flaml/ml.py b/flaml/ml.py
index 146fe91acf..55256d3de2 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -37,6 +37,7 @@
     ARIMA,
     SARIMAX,
     TransformersEstimator,
+    AGTextPredictorEstimator,
 )
 from .data import CLASSIFICATION, group_counts, TS_FORECAST, TS_VALUE_COL
 import logging
@@ -121,6 +122,8 @@ def get_estimator_class(task, estimator_name):
         estimator_class = SARIMAX
     elif estimator_name == "transformer":
         estimator_class = TransformersEstimator
+    elif estimator_name == "agtextpredictor":
+        estimator_class = AGTextPredictorEstimator
     else:
         raise ValueError(
             estimator_name + " is not a built-in learner. "
diff --git a/flaml/model.py b/flaml/model.py
index d9f4700ce3..54c2abf472 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -2103,18 +2103,17 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
         training_time = time.time() - start_time
         return training_time
     
-    def predict(self, X, as_pandas=False):
-        output = self._model.predict(self._kwargs["valid_data"], as_pandas=as_pandas)
+    def predict(self, X):
+        output = self._model.predict(self._kwargs["valid_data"], as_pandas=False)
         return output
 
-
-    def predict_proba(self, X, as_pandas=False, as_multiclass=True):
+    def predict_proba(self, X, as_multiclass=True):
         # only works for classification tasks
         assert (
             self._task in CLASSIFICATION
         ), "predict_proba() only for classification tasks."
 
-        output = self._model.predict_proba(self._kwargs["valid_data"], as_pandas=as_pandas)
+        output = self._model.predict_proba(self._kwargs["valid_data"], as_pandas=False)
         if not as_multiclass:
             if self._task == "binary":
                 output = output[:, 1]
diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py
index adcfdf3298..1afb569eb2 100644
--- a/test/automl/test_notebook_example.py
+++ b/test/automl/test_notebook_example.py
@@ -78,7 +78,7 @@ def test_mlflow():
     import subprocess
     import sys
 
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow", "--user"])
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"])
     import mlflow
     from flaml.data import load_openml_task
 
diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py
new file mode 100644
index 0000000000..0ddec3d1d3
--- /dev/null
+++ b/test/test_agtextpredictor.py
@@ -0,0 +1,132 @@
+from flaml import AutoML
+import pandas as pd
+import requests
+import sklearn
+import numpy as np
+import os
+import sys
+import json
+from sklearn.model_selection import train_test_split
+os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
+
+def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
+    """
+    Returns default holdout_frac used in fit().
+    Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples.
+    Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243
+    """
+    if num_train_rows < 5000:
+        holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows))
+    else:
+        holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows))
+
+    if hyperparameter_tune:
+        holdout_frac = min(0.2, holdout_frac * 2)  # We want to allocate more validation data for HPO to avoid overfitting
+
+    return holdout_frac
+    
+def test_ag_text_predictor():
+    if sys.version < "3.7":
+        # do not test on python3.6
+        return
+
+    seed = 123
+    metric = "roc_auc"
+    problem_type = "binary"
+    train_data = {
+        "sentence1": [
+            'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
+            "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
+            "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
+            "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
+            "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
+            "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .",
+            "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
+            "The DVD-CCA then appealed to the state Supreme Court .",        
+        ],
+        "sentence2": [
+            'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
+            "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
+            "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
+            "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
+            "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
+            "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
+            "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
+            "The DVD CCA appealed that decision to the U.S. Supreme Court .",
+        ],
+        "numerical1":[1, 2, 3, 4, 5, 6, 7, 8],
+        "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"],
+        "label": [1, 0, 1, 0, 1, 1, 0, 1],
+        "idx": [0, 1, 2, 3, 4, 5, 6, 7],
+    }
+    train_dataset = pd.DataFrame(train_data)
+
+    test_data = {
+            "sentence1": [
+                "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .",
+                "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .",
+                "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .",
+                "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .",
+            ],
+            "sentence2": [
+                "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .",
+                "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .",
+                "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .",
+                "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .",
+            ],
+            "numerical1":[3, 4, 5, 6],
+            "categorical1": ["b", "a", "a", "b"],
+            "label": [0, 1, 1, 0],
+            "idx": [8, 10, 11, 12],
+        }
+    test_dataset = pd.DataFrame(test_data)
+
+    # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR
+    holdout_frac = default_holdout_frac(len(train_dataset), False)
+
+    _, valid_dataset = train_test_split(train_dataset,
+                                    test_size=holdout_frac,
+                                    random_state=np.random.RandomState(seed))
+    
+    feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
+    
+    automl = AutoML()
+    automl_settings = {
+        "gpu_per_trial": 0,
+        "max_iter": 3,
+        "time_budget": 50,
+        "task": "binary",
+        "metric": "roc_auc", 
+    }
+
+    automl_settings["custom_fix_args"] = {   
+        "output_dir": "test/data/output/",
+        "text_backbone": "electra_base",
+        "multimodal_fusion_strategy": "fuse_late", 
+        "dataset_name": "test_ag", 
+        "label_column": "label",
+        "per_device_batch_size": 4,
+    }
+    
+    try:
+        automl.fit(
+            dataframe=train_dataset[feature_columns+["label"]],
+            label="label",
+            train_data=train_dataset[feature_columns+["label"]],
+            valid_data=valid_dataset[feature_columns+["label"]],
+            X_val=valid_dataset[feature_columns],
+            y_val=valid_dataset["label"],
+            estimator_list=["agtextpredictor"],
+            **automl_settings
+        )
+    except requests.exceptions.HTTPError:
+        return
+    
+    print("Begin to run inference on test set")
+    save_dir = automl_settings["custom_fix_args"]["output_dir"]
+    score = automl.model.estimator.evaluate(test_dataset)
+    print(f"Inference on test set complete, {metric}: {score}")
+
+
+if __name__ == "__main__":
+    test_ag_text_predictor()
\ No newline at end of file

From f9ca56ba06f5aa4bdb6978d2d51a216074e62181 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Wed, 16 Mar 2022 09:51:35 -0400
Subject: [PATCH 06/50] new estimator, no test file

---
 test/test_agtextpredictor.py | 132 -----------------------------------
 1 file changed, 132 deletions(-)
 delete mode 100644 test/test_agtextpredictor.py

diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py
deleted file mode 100644
index 0ddec3d1d3..0000000000
--- a/test/test_agtextpredictor.py
+++ /dev/null
@@ -1,132 +0,0 @@
-from flaml import AutoML
-import pandas as pd
-import requests
-import sklearn
-import numpy as np
-import os
-import sys
-import json
-from sklearn.model_selection import train_test_split
-os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
-
-def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
-    """
-    Returns default holdout_frac used in fit().
-    Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples.
-    Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243
-    """
-    if num_train_rows < 5000:
-        holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows))
-    else:
-        holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows))
-
-    if hyperparameter_tune:
-        holdout_frac = min(0.2, holdout_frac * 2)  # We want to allocate more validation data for HPO to avoid overfitting
-
-    return holdout_frac
-    
-def test_ag_text_predictor():
-    if sys.version < "3.7":
-        # do not test on python3.6
-        return
-
-    seed = 123
-    metric = "roc_auc"
-    problem_type = "binary"
-    train_data = {
-        "sentence1": [
-            'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
-            "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
-            "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
-            "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
-            "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
-            "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .",
-            "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
-            "The DVD-CCA then appealed to the state Supreme Court .",        
-        ],
-        "sentence2": [
-            'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
-            "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
-            "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
-            "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
-            "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
-            "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
-            "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
-            "The DVD CCA appealed that decision to the U.S. Supreme Court .",
-        ],
-        "numerical1":[1, 2, 3, 4, 5, 6, 7, 8],
-        "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"],
-        "label": [1, 0, 1, 0, 1, 1, 0, 1],
-        "idx": [0, 1, 2, 3, 4, 5, 6, 7],
-    }
-    train_dataset = pd.DataFrame(train_data)
-
-    test_data = {
-            "sentence1": [
-                "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .",
-                "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .",
-                "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .",
-                "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .",
-            ],
-            "sentence2": [
-                "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .",
-                "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .",
-                "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .",
-                "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .",
-            ],
-            "numerical1":[3, 4, 5, 6],
-            "categorical1": ["b", "a", "a", "b"],
-            "label": [0, 1, 1, 0],
-            "idx": [8, 10, 11, 12],
-        }
-    test_dataset = pd.DataFrame(test_data)
-
-    # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR
-    holdout_frac = default_holdout_frac(len(train_dataset), False)
-
-    _, valid_dataset = train_test_split(train_dataset,
-                                    test_size=holdout_frac,
-                                    random_state=np.random.RandomState(seed))
-    
-    feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
-    
-    automl = AutoML()
-    automl_settings = {
-        "gpu_per_trial": 0,
-        "max_iter": 3,
-        "time_budget": 50,
-        "task": "binary",
-        "metric": "roc_auc", 
-    }
-
-    automl_settings["custom_fix_args"] = {   
-        "output_dir": "test/data/output/",
-        "text_backbone": "electra_base",
-        "multimodal_fusion_strategy": "fuse_late", 
-        "dataset_name": "test_ag", 
-        "label_column": "label",
-        "per_device_batch_size": 4,
-    }
-    
-    try:
-        automl.fit(
-            dataframe=train_dataset[feature_columns+["label"]],
-            label="label",
-            train_data=train_dataset[feature_columns+["label"]],
-            valid_data=valid_dataset[feature_columns+["label"]],
-            X_val=valid_dataset[feature_columns],
-            y_val=valid_dataset["label"],
-            estimator_list=["agtextpredictor"],
-            **automl_settings
-        )
-    except requests.exceptions.HTTPError:
-        return
-    
-    print("Begin to run inference on test set")
-    save_dir = automl_settings["custom_fix_args"]["output_dir"]
-    score = automl.model.estimator.evaluate(test_dataset)
-    print(f"Inference on test set complete, {metric}: {score}")
-
-
-if __name__ == "__main__":
-    test_ag_text_predictor()
\ No newline at end of file

From fe0ecbb868f29b6aaea61ef8c5949d53281c2b66 Mon Sep 17 00:00:00 2001
From: Qiaochu Song <qcsong0818@gmail.com>
Date: Wed, 16 Mar 2022 11:48:33 -0400
Subject: [PATCH 07/50] Update automl.py

---
 flaml/automl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index cde608a942..8b53c70fd8 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -462,7 +462,7 @@ def custom_metric(
         def custom_metric(
             X_val, y_val, estimator, labels,
             X_train, y_train, weight_val=None, weight_train=None,
-            *args,
+            **args,
         ):
             from sklearn.metrics import log_loss
             import time

From 4a52ac7397bc493cdc05c82697628844eaa531df Mon Sep 17 00:00:00 2001
From: Qiaochu Song <qcsong0818@gmail.com>
Date: Wed, 16 Mar 2022 11:53:34 -0400
Subject: [PATCH 08/50] Update automl.py

---
 flaml/automl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index 8b53c70fd8..cde608a942 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -462,7 +462,7 @@ def custom_metric(
         def custom_metric(
             X_val, y_val, estimator, labels,
             X_train, y_train, weight_val=None, weight_train=None,
-            **args,
+            *args,
         ):
             from sklearn.metrics import log_loss
             import time

From 30cc834c50bc13a9ec38228a85788af76c0c3958 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Wed, 16 Mar 2022 15:01:40 -0400
Subject: [PATCH 09/50] add test with gc, narrow down mxnet version

---
 flaml/model.py               |  81 ++++++++++-----------
 setup.py                     |   2 +-
 test/test_agtextpredictor.py | 132 +++++++++++++++++++++++++++++++++++
 3 files changed, 175 insertions(+), 40 deletions(-)
 create mode 100644 test/test_agtextpredictor.py

diff --git a/flaml/model.py b/flaml/model.py
index 54c2abf472..f8ed1aade3 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -1977,26 +1977,28 @@ def __init__(self, task="binary", **params,):
 
     @classmethod
     def search_space(cls, **params):
-        # Add the possible search space configs here, e.g. 'optimization.lr'
-        # reference: 
-        # https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values
+        """
+        Add the possible search space configs here, e.g. 'optimization.lr'
+        reference:
+        https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values
+        """
         search_space_dict = {
             'model.network.agg_net.mid_units': {
-                                                "domain": tune.choice(list(range(32, 129))),
-                                                "init_value": 128
-                                                },
+                "domain": tune.choice(list(range(32, 129))),
+                "init_value": 128,
+            },
             'optimization.lr': {
-                                "domain": tune.loguniform(lower=1E-5, upper=1E-4),
-                                "init_value": 1E-4,
-                                },
-            'optimization.wd':{
-                                "domain": tune.choice([1E-4, 1E-3, 1E-2]),
-                                "init_value":1E-4
-                                },
+                "domain": tune.loguniform(lower=1E-5, upper=1E-4),
+                "init_value": 1E-4,
+            },
+            'optimization.wd': {
+                "domain": tune.choice([1E-4, 1E-3, 1E-2]),
+                "init_value":1E-4,
+            },
             'optimization.warmup_portion': {
-                                 "domain": tune.choice([0.1, 0.2]), 
-                                "init_value":0.1, 
-                                },
+                "domain": tune.choice([0.1, 0.2]),
+                "init_value":0.1, 
+            },
         }
         return search_space_dict
 
@@ -2006,7 +2008,7 @@ def _init_fix_args(self, automl_fit_kwargs: dict=None):
         this includes:
             "output_dir",
             "text_backbone": "electra_base"
-            "multimodal_fusion_strategy":"fuse_late", 
+            "multimodal_fusion_strategy":"fuse_late",
         """
         fix_args = {}
         FIX_ARGS_LIST = ["output_dir", "dataset_name", "label_column", "per_device_batch_size",
@@ -2016,7 +2018,7 @@ def _init_fix_args(self, automl_fit_kwargs: dict=None):
                 key in FIX_ARGS_LIST
             ), "The specified key {} is not in the argument list: output_dir, label_column, dataset_name, text_backbone,\
                 multimodal_fusion_strategy".format(key)
-            
+
             fix_args[key] = value
 
         self.fix_args = fix_args
@@ -2033,15 +2035,15 @@ def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str):
         cfg = ag_text_presets.create(base_key)
         # NOTE: if the search_space() is modified, add new items or delete here too.
         TUNABLE_HP = set(['model.network.agg_net.mid_units',
-                      'optimization.batch_size',
-                      'optimization.layerwise_lr_decay',
-                      'optimization.lr',
-                      'optimization.nbest',
-                      'optimization.num_train_epochs',
-                      'optimization.per_device_batch_size',
-                      'optimization.wd',
-                      'optimization.warmup_portion',
-                     ])
+                          'optimization.batch_size',
+                          'optimization.layerwise_lr_decay',
+                          'optimization.lr',
+                          'optimization.nbest',
+                          'optimization.num_train_epochs',
+                          'optimization.per_device_batch_size',
+                          'optimization.wd',
+                          'optimization.warmup_portion',
+                          ])
         search_space = cfg['models']['MultimodalTextModel']['search_space']
         for key, value in self.params.items():
             if key in TUNABLE_HP:
@@ -2052,7 +2054,7 @@ def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str):
                     search_space[key] = value
             search_space['optimization.per_device_batch_size'] = self.fix_args['per_device_batch_size']
         return cfg
-   
+
     def _set_seed(self, seed):
         import random
         import mxnet as mx
@@ -2068,10 +2070,10 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
         # the seed set in the bash script for ag experiment is 123
         seed = self.params.get("seed", 123)
         self._set_seed(seed)
-        
+
         # get backbone and fusion strategy
-        text_backbone=self.fix_args["text_backbone"]
-        multimodal_fusion_strategy=self.fix_args["multimodal_fusion_strategy"]
+        text_backbone = self.fix_args["text_backbone"]
+        multimodal_fusion_strategy = self.fix_args["multimodal_fusion_strategy"]
 
         # get & set the save dir, get the dataset info
         save_dir = self.fix_args["output_dir"]
@@ -2079,30 +2081,30 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
         dataset_name = self.fix_args["dataset_name"]
         ag_model_save_dir = os.path.join(save_dir, f"{dataset_name}_ag_text_multimodal_{text_backbone}\
                                                     _{multimodal_fusion_strategy}_no_ensemble")
-        
+
         # set the of the hyperparameters
         self.hyperparameters = self._init_hp_config(text_backbone, multimodal_fusion_strategy)
         PROBLEM_TYPE_MAPPING = {"binary": "binary", "multi": "multiclass", "regression": "regression"}
         TASK_METRIC_MAPPING = {"multi": "acc", "binary": "roc_auc", "regression": "r2"}
-       
+
        # train the model
         start_time = time.time()
-        
-        self._model = self.estimator_class(path=save_dir,
+
+        self._model = self.estimator_class(path=ag_model_save_dir,
                                            label=label_column,
                                            problem_type=PROBLEM_TYPE_MAPPING[self._task],
                                            eval_metric=TASK_METRIC_MAPPING[self._task])
-        
+
         train_data = self._kwargs["train_data"]
 
         self._model.fit(train_data=train_data,
                         hyperparameters=self.hyperparameters,
-                        time_limit=budget, 
+                        time_limit=budget,
                         seed=seed)
- 
+
         training_time = time.time() - start_time
         return training_time
-    
+
     def predict(self, X):
         output = self._model.predict(self._kwargs["valid_data"], as_pandas=False)
         return output
@@ -2119,6 +2121,7 @@ def predict_proba(self, X, as_multiclass=True):
                 output = output[:, 1]
         return output
 
+
 class suppress_stdout_stderr(object):
     def __init__(self):
         # Open a pair of null files
diff --git a/setup.py b/setup.py
index 0f7b19f9c5..e0712f0eaa 100644
--- a/setup.py
+++ b/setup.py
@@ -65,7 +65,7 @@
             "mlflow",
         ],
         "autogluon": [
-            "mxnet<2.0.0",
+            "mxnet>=1.7.0,<2.0.0",
             "autogluon.text==0.4.0",
             "autogluon.features==0.4.0",
         ],
diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py
new file mode 100644
index 0000000000..ccaf867075
--- /dev/null
+++ b/test/test_agtextpredictor.py
@@ -0,0 +1,132 @@
+from flaml import AutoML
+import pandas as pd
+import requests
+import gc
+import numpy as np
+import os
+import sys
+from sklearn.model_selection import train_test_split
+os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
+
+def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
+    """
+    Returns default holdout_frac used in fit().
+    Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples.
+    Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243
+    """
+    if num_train_rows < 5000:
+        holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows))
+    else:
+        holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows))
+
+    if hyperparameter_tune:
+        holdout_frac = min(0.2, holdout_frac * 2)  # We want to allocate more validation data for HPO to avoid overfitting
+
+    return holdout_frac
+    
+def test_ag_text_predictor():
+    if sys.version < "3.7":
+        # do not test on python3.6
+        return
+
+    seed = 123
+    metric = "roc_auc"
+    problem_type = "binary"
+    train_data = {
+        "sentence1": [
+            'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
+            "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
+            "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
+            "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
+            "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
+            "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .",
+            "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
+            "The DVD-CCA then appealed to the state Supreme Court .",        
+        ],
+        "sentence2": [
+            'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
+            "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
+            "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
+            "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
+            "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
+            "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
+            "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
+            "The DVD CCA appealed that decision to the U.S. Supreme Court .",
+        ],
+        "numerical1":[1, 2, 3, 4, 5, 6, 7, 8],
+        "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"],
+        "label": [1, 0, 1, 0, 1, 1, 0, 1],
+        "idx": [0, 1, 2, 3, 4, 5, 6, 7],
+    }
+    train_dataset = pd.DataFrame(train_data)
+
+    test_data = {
+            "sentence1": [
+                "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .",
+                "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .",
+                "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .",
+                "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .",
+            ],
+            "sentence2": [
+                "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .",
+                "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .",
+                "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .",
+                "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .",
+            ],
+            "numerical1":[3, 4, 5, 6],
+            "categorical1": ["b", "a", "a", "b"],
+            "label": [0, 1, 1, 0],
+            "idx": [8, 10, 11, 12],
+        }
+    test_dataset = pd.DataFrame(test_data)
+
+    # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR
+    holdout_frac = default_holdout_frac(len(train_dataset), False)
+
+    _, valid_dataset = train_test_split(train_dataset,
+                                    test_size=holdout_frac,
+                                    random_state=np.random.RandomState(seed))
+    
+    feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
+    
+    automl = AutoML()
+    automl_settings = {
+        "gpu_per_trial": 0,
+        "max_iter": 2,
+        "time_budget": 20,
+        "task": "binary",
+        "metric": "roc_auc", 
+    }
+
+    automl_settings["custom_fix_args"] = {   
+        "output_dir": "test/ag/output/",
+        "text_backbone": "electra_base",
+        "multimodal_fusion_strategy": "fuse_late", 
+        "dataset_name": "test_ag", 
+        "label_column": "label",
+        "per_device_batch_size": 2,
+    }
+    
+    try:
+        automl.fit(
+            dataframe=train_dataset[feature_columns+["label"]],
+            label="label",
+            train_data=train_dataset[feature_columns+["label"]],
+            valid_data=valid_dataset[feature_columns+["label"]],
+            X_val=valid_dataset[feature_columns],
+            y_val=valid_dataset["label"],
+            estimator_list=["agtextpredictor"],
+            **automl_settings
+        )
+    except requests.exceptions.HTTPError:
+        return
+    
+    print("Begin to run inference on test set")
+    score = automl.model.estimator.evaluate(test_dataset)
+    print(f"Inference on test set complete, {metric}: {score}")
+    del automl
+    gc.collect()
+
+
+if __name__ == "__main__":
+    test_ag_text_predictor()
\ No newline at end of file

From 6b75a73270cae10cc96be67af183a1c10607e105 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Wed, 16 Mar 2022 16:17:12 -0400
Subject: [PATCH 10/50] skip test for py3.6 and win+py3.8, loose mxnet ver

---
 setup.py                     | 2 +-
 test/test_agtextpredictor.py | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index e0712f0eaa..0f7b19f9c5 100644
--- a/setup.py
+++ b/setup.py
@@ -65,7 +65,7 @@
             "mlflow",
         ],
         "autogluon": [
-            "mxnet>=1.7.0,<2.0.0",
+            "mxnet<2.0.0",
             "autogluon.text==0.4.0",
             "autogluon.features==0.4.0",
         ],
diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py
index ccaf867075..d1801a10ee 100644
--- a/test/test_agtextpredictor.py
+++ b/test/test_agtextpredictor.py
@@ -5,6 +5,7 @@
 import numpy as np
 import os
 import sys
+import platform
 from sklearn.model_selection import train_test_split
 os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
 
@@ -28,6 +29,9 @@ def test_ag_text_predictor():
     if sys.version < "3.7":
         # do not test on python3.6
         return
+    elif platform.system() == "Windows" and sys.version_info.major == 3 and sys.version_info.minor == 8:
+        # do not test on windows with py3.8
+        return
 
     seed = 123
     metric = "roc_auc"

From d10945e785f168a50b9e55c86e2c60f8efb116dd Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Wed, 16 Mar 2022 17:32:05 -0400
Subject: [PATCH 11/50] no ag on windows, remove mlflow dependency

---
 .github/workflows/python-package.yml |  4 ++--
 flaml/model.py                       | 36 +++++++++++++++-------------
 setup.py                             |  1 -
 test/test_agtextpredictor.py         |  4 +++-
 4 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 3404d1824b..3179338ffc 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -47,8 +47,8 @@ jobs:
         run: |
           pip install -e .[ray,forecast]
           pip install 'tensorboardX<=2.2'
-      - name: If python version > 3.6, install autogluon
-        if: matrix.python-version >= '3.7'
+      - name: If python version > 3.6 and not on windows, install autogluon
+        if: matrix.python-version >= '3.7' && (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest')
         run: |
           pip install -e .[autogluon]
       - name: Lint with flake8
diff --git a/flaml/model.py b/flaml/model.py
index f8ed1aade3..f383b8a00d 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -1983,19 +1983,19 @@ def search_space(cls, **params):
         https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values
         """
         search_space_dict = {
-            'model.network.agg_net.mid_units': {
+            "model.network.agg_net.mid_units": {
                 "domain": tune.choice(list(range(32, 129))),
                 "init_value": 128,
             },
-            'optimization.lr': {
+            "optimization.lr": {
                 "domain": tune.loguniform(lower=1E-5, upper=1E-4),
                 "init_value": 1E-4,
             },
-            'optimization.wd': {
+            "optimization.wd": {
                 "domain": tune.choice([1E-4, 1E-3, 1E-2]),
                 "init_value":1E-4,
             },
-            'optimization.warmup_portion': {
+            "optimization.warmup_portion": {
                 "domain": tune.choice([0.1, 0.2]),
                 "init_value":0.1, 
             },
@@ -2012,7 +2012,7 @@ def _init_fix_args(self, automl_fit_kwargs: dict=None):
         """
         fix_args = {}
         FIX_ARGS_LIST = ["output_dir", "dataset_name", "label_column", "per_device_batch_size",
-                         "text_backbone", "multimodal_fusion_strategy", ]
+                         "text_backbone", "multimodal_fusion_strategy", "num_train_epochs", "batch_size"]
         for key, value in automl_fit_kwargs["custom_fix_args"].items():
             assert (
                 key in FIX_ARGS_LIST
@@ -2034,17 +2034,20 @@ def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str):
         base_key = f'{text_backbone}_{multimodal_fusion_strategy}'
         cfg = ag_text_presets.create(base_key)
         # NOTE: if the search_space() is modified, add new items or delete here too.
-        TUNABLE_HP = set(['model.network.agg_net.mid_units',
-                          'optimization.batch_size',
-                          'optimization.layerwise_lr_decay',
-                          'optimization.lr',
-                          'optimization.nbest',
-                          'optimization.num_train_epochs',
-                          'optimization.per_device_batch_size',
-                          'optimization.wd',
-                          'optimization.warmup_portion',
+        TUNABLE_HP = set(["model.network.agg_net.mid_units",
+                          "optimization.batch_size",
+                          "optimization.layerwise_lr_decay",
+                          "optimization.lr",
+                          "optimization.nbest",
+                          "optimization.num_train_epochs",
+                          "optimization.per_device_batch_size",
+                          "optimization.wd",
+                          "optimization.warmup_portion",
                           ])
-        search_space = cfg['models']['MultimodalTextModel']['search_space']
+        search_space = cfg["models"]["MultimodalTextModel"]["search_space"]
+        search_space["optimization.per_device_batch_size"] = self.fix_args.get("per_device_batch_size", 4)
+        search_space["optimization.num_train_epochs"] = self.fix_args.get("num_train_epochs", 10)
+        search_space["optimization.batch_size"] = self.fix_args.get("batch_size", 128)
         for key, value in self.params.items():
             if key in TUNABLE_HP:
                 # NOTE: FLAML uses np.float64 but AG uses float, need to transform
@@ -2052,7 +2055,8 @@ def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str):
                     search_space[key] = value.item()
                 else:
                     search_space[key] = value
-            search_space['optimization.per_device_batch_size'] = self.fix_args['per_device_batch_size']
+            
+
         return cfg
 
     def _set_seed(self, seed):
diff --git a/setup.py b/setup.py
index 0f7b19f9c5..24a622b01f 100644
--- a/setup.py
+++ b/setup.py
@@ -62,7 +62,6 @@
             "rouge_score",
             "hcrystalball==0.1.10",
             "seqeval",
-            "mlflow",
         ],
         "autogluon": [
             "mxnet<2.0.0",
diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py
index d1801a10ee..4ae8293d91 100644
--- a/test/test_agtextpredictor.py
+++ b/test/test_agtextpredictor.py
@@ -108,7 +108,9 @@ def test_ag_text_predictor():
         "multimodal_fusion_strategy": "fuse_late", 
         "dataset_name": "test_ag", 
         "label_column": "label",
-        "per_device_batch_size": 2,
+        "per_device_batch_size": 4,
+        "num_train_epochs": 2,
+        "batch_size": 4,
     }
     
     try:

From 06f64b267f0f1c2e2423c468bb2d08f68bc4d2e9 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Wed, 16 Mar 2022 17:32:50 -0400
Subject: [PATCH 12/50] no ag on windows, remove mlflow dependency

---
 test/test_agtextpredictor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py
index 4ae8293d91..61d6886c4a 100644
--- a/test/test_agtextpredictor.py
+++ b/test/test_agtextpredictor.py
@@ -29,7 +29,7 @@ def test_ag_text_predictor():
     if sys.version < "3.7":
         # do not test on python3.6
         return
-    elif platform.system() == "Windows" and sys.version_info.major == 3 and sys.version_info.minor == 8:
+    elif platform.system() == "Windows":
         # do not test on windows with py3.8
         return
 

From c9ff3d448fadf34ad3264bbb4b31442e6627ebd9 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Wed, 16 Mar 2022 20:17:15 -0400
Subject: [PATCH 13/50] test with direct return

---
 flaml/model.py               |  6 ++----
 test/test_agtextpredictor.py | 32 +++++++++++++++++++-------------
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/flaml/model.py b/flaml/model.py
index f383b8a00d..6d65e69227 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -2055,8 +2055,6 @@ def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str):
                     search_space[key] = value.item()
                 else:
                     search_space[key] = value
-            
-
         return cfg
 
     def _set_seed(self, seed):
@@ -2086,12 +2084,12 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
         ag_model_save_dir = os.path.join(save_dir, f"{dataset_name}_ag_text_multimodal_{text_backbone}\
                                                     _{multimodal_fusion_strategy}_no_ensemble")
 
-        # set the of the hyperparameters
+        # set the hyperparameters
         self.hyperparameters = self._init_hp_config(text_backbone, multimodal_fusion_strategy)
         PROBLEM_TYPE_MAPPING = {"binary": "binary", "multi": "multiclass", "regression": "regression"}
         TASK_METRIC_MAPPING = {"multi": "acc", "binary": "roc_auc", "regression": "r2"}
 
-       # train the model
+        # train the model
         start_time = time.time()
 
         self._model = self.estimator_class(path=ag_model_save_dir,
diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py
index 61d6886c4a..ee54e4e878 100644
--- a/test/test_agtextpredictor.py
+++ b/test/test_agtextpredictor.py
@@ -9,6 +9,7 @@
 from sklearn.model_selection import train_test_split
 os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
 
+
 def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
     """
     Returns default holdout_frac used in fit().
@@ -21,11 +22,15 @@ def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
         holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows))
 
     if hyperparameter_tune:
-        holdout_frac = min(0.2, holdout_frac * 2)  # We want to allocate more validation data for HPO to avoid overfitting
+        holdout_frac = min(0.2, holdout_frac * 2)  # to allocate more validation data for HPO to avoid overfitting
 
     return holdout_frac
-    
+
+
 def test_ag_text_predictor():
+    # DEBUGGING
+    return
+    # DEBUGGING
     if sys.version < "3.7":
         # do not test on python3.6
         return
@@ -57,7 +62,7 @@ def test_ag_text_predictor():
             "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
             "The DVD CCA appealed that decision to the U.S. Supreme Court .",
         ],
-        "numerical1":[1, 2, 3, 4, 5, 6, 7, 8],
+        "numerical1": [1, 2, 3, 4, 5, 6, 7, 8],
         "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"],
         "label": [1, 0, 1, 0, 1, 1, 0, 1],
         "idx": [0, 1, 2, 3, 4, 5, 6, 7],
@@ -77,7 +82,7 @@ def test_ag_text_predictor():
                 "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .",
                 "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .",
             ],
-            "numerical1":[3, 4, 5, 6],
+            "numerical1": [3, 4, 5, 6],
             "categorical1": ["b", "a", "a", "b"],
             "label": [0, 1, 1, 0],
             "idx": [8, 10, 11, 12],
@@ -90,29 +95,29 @@ def test_ag_text_predictor():
     _, valid_dataset = train_test_split(train_dataset,
                                     test_size=holdout_frac,
                                     random_state=np.random.RandomState(seed))
-    
+
     feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
-    
+
     automl = AutoML()
     automl_settings = {
         "gpu_per_trial": 0,
         "max_iter": 2,
-        "time_budget": 20,
+        "time_budget": 50,
         "task": "binary",
-        "metric": "roc_auc", 
+        "metric": "roc_auc",
     }
 
-    automl_settings["custom_fix_args"] = {   
+    automl_settings["custom_fix_args"] = {
         "output_dir": "test/ag/output/",
         "text_backbone": "electra_base",
-        "multimodal_fusion_strategy": "fuse_late", 
-        "dataset_name": "test_ag", 
+        "multimodal_fusion_strategy": "fuse_late",
+        "dataset_name": "test_ag",
         "label_column": "label",
         "per_device_batch_size": 4,
         "num_train_epochs": 2,
         "batch_size": 4,
     }
-    
+
     try:
         automl.fit(
             dataframe=train_dataset[feature_columns+["label"]],
@@ -126,11 +131,12 @@ def test_ag_text_predictor():
         )
     except requests.exceptions.HTTPError:
         return
-    
+
     print("Begin to run inference on test set")
     score = automl.model.estimator.evaluate(test_dataset)
     print(f"Inference on test set complete, {metric}: {score}")
     del automl
+    # del mx
     gc.collect()
 
 

From e7b6f6d1da32fd67c462f3b123b62a943bd2ccfe Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Wed, 16 Mar 2022 20:40:32 -0400
Subject: [PATCH 14/50] debug without new test

---
 test/test_agtextpredictor.py | 144 -----------------------------------
 1 file changed, 144 deletions(-)
 delete mode 100644 test/test_agtextpredictor.py

diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py
deleted file mode 100644
index ee54e4e878..0000000000
--- a/test/test_agtextpredictor.py
+++ /dev/null
@@ -1,144 +0,0 @@
-from flaml import AutoML
-import pandas as pd
-import requests
-import gc
-import numpy as np
-import os
-import sys
-import platform
-from sklearn.model_selection import train_test_split
-os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
-
-
-def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
-    """
-    Returns default holdout_frac used in fit().
-    Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples.
-    Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243
-    """
-    if num_train_rows < 5000:
-        holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows))
-    else:
-        holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows))
-
-    if hyperparameter_tune:
-        holdout_frac = min(0.2, holdout_frac * 2)  # to allocate more validation data for HPO to avoid overfitting
-
-    return holdout_frac
-
-
-def test_ag_text_predictor():
-    # DEBUGGING
-    return
-    # DEBUGGING
-    if sys.version < "3.7":
-        # do not test on python3.6
-        return
-    elif platform.system() == "Windows":
-        # do not test on windows with py3.8
-        return
-
-    seed = 123
-    metric = "roc_auc"
-    problem_type = "binary"
-    train_data = {
-        "sentence1": [
-            'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
-            "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
-            "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
-            "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
-            "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
-            "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .",
-            "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
-            "The DVD-CCA then appealed to the state Supreme Court .",        
-        ],
-        "sentence2": [
-            'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
-            "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
-            "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
-            "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
-            "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
-            "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
-            "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
-            "The DVD CCA appealed that decision to the U.S. Supreme Court .",
-        ],
-        "numerical1": [1, 2, 3, 4, 5, 6, 7, 8],
-        "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"],
-        "label": [1, 0, 1, 0, 1, 1, 0, 1],
-        "idx": [0, 1, 2, 3, 4, 5, 6, 7],
-    }
-    train_dataset = pd.DataFrame(train_data)
-
-    test_data = {
-            "sentence1": [
-                "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .",
-                "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .",
-                "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .",
-                "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .",
-            ],
-            "sentence2": [
-                "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .",
-                "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .",
-                "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .",
-                "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .",
-            ],
-            "numerical1": [3, 4, 5, 6],
-            "categorical1": ["b", "a", "a", "b"],
-            "label": [0, 1, 1, 0],
-            "idx": [8, 10, 11, 12],
-        }
-    test_dataset = pd.DataFrame(test_data)
-
-    # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR
-    holdout_frac = default_holdout_frac(len(train_dataset), False)
-
-    _, valid_dataset = train_test_split(train_dataset,
-                                    test_size=holdout_frac,
-                                    random_state=np.random.RandomState(seed))
-
-    feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
-
-    automl = AutoML()
-    automl_settings = {
-        "gpu_per_trial": 0,
-        "max_iter": 2,
-        "time_budget": 50,
-        "task": "binary",
-        "metric": "roc_auc",
-    }
-
-    automl_settings["custom_fix_args"] = {
-        "output_dir": "test/ag/output/",
-        "text_backbone": "electra_base",
-        "multimodal_fusion_strategy": "fuse_late",
-        "dataset_name": "test_ag",
-        "label_column": "label",
-        "per_device_batch_size": 4,
-        "num_train_epochs": 2,
-        "batch_size": 4,
-    }
-
-    try:
-        automl.fit(
-            dataframe=train_dataset[feature_columns+["label"]],
-            label="label",
-            train_data=train_dataset[feature_columns+["label"]],
-            valid_data=valid_dataset[feature_columns+["label"]],
-            X_val=valid_dataset[feature_columns],
-            y_val=valid_dataset["label"],
-            estimator_list=["agtextpredictor"],
-            **automl_settings
-        )
-    except requests.exceptions.HTTPError:
-        return
-
-    print("Begin to run inference on test set")
-    score = automl.model.estimator.evaluate(test_dataset)
-    print(f"Inference on test set complete, {metric}: {score}")
-    del automl
-    # del mx
-    gc.collect()
-
-
-if __name__ == "__main__":
-    test_ag_text_predictor()
\ No newline at end of file

From 2307b3737c95dcce7bd0b7fd958af925dfe8983b Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Wed, 16 Mar 2022 21:08:59 -0400
Subject: [PATCH 15/50] w/o os.environ setting in new test, direct return

---
 test/test_agtextpredictor.py | 143 +++++++++++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)
 create mode 100644 test/test_agtextpredictor.py

diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py
new file mode 100644
index 0000000000..89348141ae
--- /dev/null
+++ b/test/test_agtextpredictor.py
@@ -0,0 +1,143 @@
+from flaml import AutoML
+import pandas as pd
+import requests
+import gc
+import numpy as np
+import os
+import sys
+import platform
+from sklearn.model_selection import train_test_split
+# os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
+
+
+def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
+    """
+    Returns default holdout_frac used in fit().
+    Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples.
+    Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243
+    """
+    if num_train_rows < 5000:
+        holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows))
+    else:
+        holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows))
+
+    if hyperparameter_tune:
+        holdout_frac = min(0.2, holdout_frac * 2)  # to allocate more validation data for HPO to avoid overfitting
+
+    return holdout_frac
+
+
+def test_ag_text_predictor():
+    # DEBUG
+    return
+    # DEBUG
+    if sys.version < "3.7":
+        # do not test on python3.6
+        return
+    elif platform.system() == "Windows":
+        # do not test on windows with py3.8
+        return
+
+    seed = 123
+    metric = "roc_auc"
+    train_data = {
+        "sentence1": [
+            'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
+            "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
+            "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
+            "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
+            "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
+            "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .",
+            "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
+            "The DVD-CCA then appealed to the state Supreme Court .",        
+        ],
+        "sentence2": [
+            'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
+            "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
+            "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
+            "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
+            "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
+            "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
+            "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
+            "The DVD CCA appealed that decision to the U.S. Supreme Court .",
+        ],
+        "numerical1": [1, 2, 3, 4, 5, 6, 7, 8],
+        "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"],
+        "label": [1, 0, 1, 0, 1, 1, 0, 1],
+        "idx": [0, 1, 2, 3, 4, 5, 6, 7],
+    }
+    train_dataset = pd.DataFrame(train_data)
+
+    test_data = {
+            "sentence1": [
+                "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .",
+                "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .",
+                "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .",
+                "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .",
+            ],
+            "sentence2": [
+                "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .",
+                "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .",
+                "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .",
+                "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .",
+            ],
+            "numerical1": [3, 4, 5, 6],
+            "categorical1": ["b", "a", "a", "b"],
+            "label": [0, 1, 1, 0],
+            "idx": [8, 10, 11, 12],
+        }
+    test_dataset = pd.DataFrame(test_data)
+
+    # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR
+    holdout_frac = default_holdout_frac(len(train_dataset), False)
+
+    _, valid_dataset = train_test_split(train_dataset,
+                                    test_size=holdout_frac,
+                                    random_state=np.random.RandomState(seed))
+
+    feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
+
+    automl = AutoML()
+    automl_settings = {
+        "gpu_per_trial": 0,
+        "max_iter": 2,
+        "time_budget": 50,
+        "task": "binary",
+        "metric": "roc_auc",
+    }
+
+    automl_settings["custom_fix_args"] = {
+        "output_dir": "test/ag/output/",
+        "text_backbone": "electra_base",
+        "multimodal_fusion_strategy": "fuse_late",
+        "dataset_name": "test_ag",
+        "label_column": "label",
+        "per_device_batch_size": 4,
+        "num_train_epochs": 2,
+        "batch_size": 4,
+    }
+
+    try:
+        automl.fit(
+            dataframe=train_dataset[feature_columns+["label"]],
+            label="label",
+            train_data=train_dataset[feature_columns+["label"]],
+            valid_data=valid_dataset[feature_columns+["label"]],
+            X_val=valid_dataset[feature_columns],
+            y_val=valid_dataset["label"],
+            estimator_list=["agtextpredictor"],
+            **automl_settings
+        )
+    except requests.exceptions.HTTPError:
+        return
+
+    print("Begin to run inference on test set")
+    score = automl.model.estimator.evaluate(test_dataset)
+    print(f"Inference on test set complete, {metric}: {score}")
+    del automl
+    # del mx
+    gc.collect()
+
+
+if __name__ == "__main__":
+    test_ag_text_predictor()
\ No newline at end of file

From bf3203b93ffcf73a111e26c9b33f2870442ff74a Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Wed, 16 Mar 2022 21:32:43 -0400
Subject: [PATCH 16/50] debug, import only in new test

---
 test/test_agtextpredictor.py | 242 +++++++++++++++++------------------
 1 file changed, 119 insertions(+), 123 deletions(-)

diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py
index 89348141ae..f0cf02c8ba 100644
--- a/test/test_agtextpredictor.py
+++ b/test/test_agtextpredictor.py
@@ -10,134 +10,130 @@
 # os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
 
 
-def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
-    """
-    Returns default holdout_frac used in fit().
-    Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples.
-    Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243
-    """
-    if num_train_rows < 5000:
-        holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows))
-    else:
-        holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows))
+# def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
+#     """
+#     Returns default holdout_frac used in fit().
+#     Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples.
+#     Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243
+#     """
+#     if num_train_rows < 5000:
+#         holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows))
+#     else:
+#         holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows))
 
-    if hyperparameter_tune:
-        holdout_frac = min(0.2, holdout_frac * 2)  # to allocate more validation data for HPO to avoid overfitting
+#     if hyperparameter_tune:
+#         holdout_frac = min(0.2, holdout_frac * 2)  # to allocate more validation data for HPO to avoid overfitting
 
-    return holdout_frac
+#     return holdout_frac
 
 
 def test_ag_text_predictor():
     # DEBUG
     return
     # DEBUG
-    if sys.version < "3.7":
-        # do not test on python3.6
-        return
-    elif platform.system() == "Windows":
-        # do not test on windows with py3.8
-        return
-
-    seed = 123
-    metric = "roc_auc"
-    train_data = {
-        "sentence1": [
-            'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
-            "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
-            "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
-            "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
-            "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
-            "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .",
-            "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
-            "The DVD-CCA then appealed to the state Supreme Court .",        
-        ],
-        "sentence2": [
-            'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
-            "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
-            "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
-            "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
-            "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
-            "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
-            "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
-            "The DVD CCA appealed that decision to the U.S. Supreme Court .",
-        ],
-        "numerical1": [1, 2, 3, 4, 5, 6, 7, 8],
-        "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"],
-        "label": [1, 0, 1, 0, 1, 1, 0, 1],
-        "idx": [0, 1, 2, 3, 4, 5, 6, 7],
-    }
-    train_dataset = pd.DataFrame(train_data)
-
-    test_data = {
-            "sentence1": [
-                "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .",
-                "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .",
-                "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .",
-                "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .",
-            ],
-            "sentence2": [
-                "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .",
-                "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .",
-                "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .",
-                "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .",
-            ],
-            "numerical1": [3, 4, 5, 6],
-            "categorical1": ["b", "a", "a", "b"],
-            "label": [0, 1, 1, 0],
-            "idx": [8, 10, 11, 12],
-        }
-    test_dataset = pd.DataFrame(test_data)
-
-    # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR
-    holdout_frac = default_holdout_frac(len(train_dataset), False)
-
-    _, valid_dataset = train_test_split(train_dataset,
-                                    test_size=holdout_frac,
-                                    random_state=np.random.RandomState(seed))
-
-    feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
-
-    automl = AutoML()
-    automl_settings = {
-        "gpu_per_trial": 0,
-        "max_iter": 2,
-        "time_budget": 50,
-        "task": "binary",
-        "metric": "roc_auc",
-    }
-
-    automl_settings["custom_fix_args"] = {
-        "output_dir": "test/ag/output/",
-        "text_backbone": "electra_base",
-        "multimodal_fusion_strategy": "fuse_late",
-        "dataset_name": "test_ag",
-        "label_column": "label",
-        "per_device_batch_size": 4,
-        "num_train_epochs": 2,
-        "batch_size": 4,
-    }
-
-    try:
-        automl.fit(
-            dataframe=train_dataset[feature_columns+["label"]],
-            label="label",
-            train_data=train_dataset[feature_columns+["label"]],
-            valid_data=valid_dataset[feature_columns+["label"]],
-            X_val=valid_dataset[feature_columns],
-            y_val=valid_dataset["label"],
-            estimator_list=["agtextpredictor"],
-            **automl_settings
-        )
-    except requests.exceptions.HTTPError:
-        return
-
-    print("Begin to run inference on test set")
-    score = automl.model.estimator.evaluate(test_dataset)
-    print(f"Inference on test set complete, {metric}: {score}")
-    del automl
-    # del mx
-    gc.collect()
-
-
-if __name__ == "__main__":
-    test_ag_text_predictor()
\ No newline at end of file
+    # if sys.version < "3.7":
+    #     # do not test on python3.6
+    #     return
+    # elif platform.system() == "Windows":
+    #     # do not test on windows with py3.8
+    #     return
+
+    # seed = 123
+    # metric = "roc_auc"
+    # train_data = {
+    #     "sentence1": [
+    #         'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
+    #         "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
+    #         "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
+    #         "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
+    #         "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
+    #         "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .",
+    #         "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
+    #         "The DVD-CCA then appealed to the state Supreme Court .",        
+    #     ],
+    #     "sentence2": [
+    #         'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
+    #         "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
+    #         "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
+    #         "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
+    #         "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
+    #         "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
+    #         "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
+    #         "The DVD CCA appealed that decision to the U.S. Supreme Court .",
+    #     ],
+    #     "numerical1": [1, 2, 3, 4, 5, 6, 7, 8],
+    #     "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"],
+    #     "label": [1, 0, 1, 0, 1, 1, 0, 1],
+    #     "idx": [0, 1, 2, 3, 4, 5, 6, 7],
+    # }
+    # train_dataset = pd.DataFrame(train_data)
+
+    # test_data = {
+    #         "sentence1": [
+    #             "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .",
+    #             "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .",
+    #             "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .",
+    #             "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .",
+    #         ],
+    #         "sentence2": [
+    #             "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .",
+    #             "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .",
+    #             "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .",
+    #             "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .",
+    #         ],
+    #         "numerical1": [3, 4, 5, 6],
+    #         "categorical1": ["b", "a", "a", "b"],
+    #         "label": [0, 1, 1, 0],
+    #         "idx": [8, 10, 11, 12],
+    #     }
+    # test_dataset = pd.DataFrame(test_data)
+
+    # # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR
+    # holdout_frac = default_holdout_frac(len(train_dataset), False)
+
+    # _, valid_dataset = train_test_split(train_dataset,
+    #                                 test_size=holdout_frac,
+    #                                 random_state=np.random.RandomState(seed))
+
+    # feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
+
+    # automl = AutoML()
+    # automl_settings = {
+    #     "gpu_per_trial": 0,
+    #     "max_iter": 2,
+    #     "time_budget": 50,
+    #     "task": "binary",
+    #     "metric": "roc_auc",
+    # }
+
+    # automl_settings["custom_fix_args"] = {
+    #     "output_dir": "test/ag/output/",
+    #     "text_backbone": "electra_base",
+    #     "multimodal_fusion_strategy": "fuse_late",
+    #     "dataset_name": "test_ag",
+    #     "label_column": "label",
+    #     "per_device_batch_size": 4,
+    #     "num_train_epochs": 2,
+    #     "batch_size": 4,
+    # }
+
+    # try:
+    #     automl.fit(
+    #         dataframe=train_dataset[feature_columns+["label"]],
+    #         label="label",
+    #         train_data=train_dataset[feature_columns+["label"]],
+    #         valid_data=valid_dataset[feature_columns+["label"]],
+    #         X_val=valid_dataset[feature_columns],
+    #         y_val=valid_dataset["label"],
+    #         estimator_list=["agtextpredictor"],
+    #         **automl_settings
+    #     )
+    # except requests.exceptions.HTTPError:
+    #     return
+
+    # print("Begin to run inference on test set")
+    # score = automl.model.estimator.evaluate(test_dataset)
+    # print(f"Inference on test set complete, {metric}: {score}")
+    # del automl
+    # # del mx
+    # gc.collect()
\ No newline at end of file

From 10c93b28cf2fb23555791d56c2e4fcd87ac41ed7 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Wed, 16 Mar 2022 23:42:33 -0400
Subject: [PATCH 17/50] move new test to automl

---
 test/automl/test_agtextpredictor.py | 135 +++++++++++++++++++++++++++
 test/test_agtextpredictor.py        | 139 ----------------------------
 2 files changed, 135 insertions(+), 139 deletions(-)
 create mode 100644 test/automl/test_agtextpredictor.py
 delete mode 100644 test/test_agtextpredictor.py

diff --git a/test/automl/test_agtextpredictor.py b/test/automl/test_agtextpredictor.py
new file mode 100644
index 0000000000..5f198db20b
--- /dev/null
+++ b/test/automl/test_agtextpredictor.py
@@ -0,0 +1,135 @@
+from flaml import AutoML
+import pandas as pd
+import requests
+import gc
+import numpy as np
+import os
+import sys
+import platform
+from sklearn.model_selection import train_test_split
+os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
+
+
+def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
+    """
+    Returns default holdout_frac used in fit().
+    Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples.
+    Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243
+    """
+    if num_train_rows < 5000:
+        holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows))
+    else:
+        holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows))
+
+    if hyperparameter_tune:
+        holdout_frac = min(0.2, holdout_frac * 2)  # to allocate more validation data for HPO to avoid overfitting
+
+    return holdout_frac
+
+
+def test_ag_text_predictor():
+    # # DEBUG
+    # return
+    # # DEBUG
+    if sys.version < "3.7":
+        # do not test on python3.6
+        return
+    elif platform.system() == "Windows":
+        # do not test on windows with py3.8
+        return
+
+    seed = 123
+    metric = "roc_auc"
+    train_data = {
+        "sentence1": [
+            'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
+            "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
+            "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
+            "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
+            "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
+            "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .",
+            "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
+            "The DVD-CCA then appealed to the state Supreme Court .",        
+        ],
+        "sentence2": [
+            'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
+            "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
+            "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
+            "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
+            "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
+            "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
+            "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
+            "The DVD CCA appealed that decision to the U.S. Supreme Court .",
+        ],
+        "numerical1": [1, 2, 3, 4, 5, 6, 7, 8],
+        "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"],
+        "label": [1, 0, 1, 0, 1, 1, 0, 1],
+        "idx": [0, 1, 2, 3, 4, 5, 6, 7],
+    }
+    train_dataset = pd.DataFrame(train_data)
+
+    test_data = {
+            "sentence1": [
+                "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .",
+                "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .",
+                "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .",
+                "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .",
+            ],
+            "sentence2": [
+                "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .",
+                "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .",
+                "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .",
+                "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .",
+            ],
+            "numerical1": [3, 4, 5, 6],
+            "categorical1": ["b", "a", "a", "b"],
+            "label": [0, 1, 1, 0],
+            "idx": [8, 10, 11, 12],
+        }
+    test_dataset = pd.DataFrame(test_data)
+
+    # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR
+    holdout_frac = default_holdout_frac(len(train_dataset), False)
+
+    _, valid_dataset = train_test_split(train_dataset,
+                                    test_size=holdout_frac,
+                                    random_state=np.random.RandomState(seed))
+
+    feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
+
+    automl = AutoML()
+    automl_settings = {
+        "gpu_per_trial": 0,
+        "max_iter": 2,
+        "time_budget": 50,
+        "task": "binary",
+        "metric": "roc_auc",
+    }
+
+    automl_settings["custom_fix_args"] = {
+        "output_dir": "test/ag/output/",
+        "text_backbone": "electra_base",
+        "multimodal_fusion_strategy": "fuse_late",
+        "dataset_name": "test_ag",
+        "label_column": "label",
+        "per_device_batch_size": 4,
+        "num_train_epochs": 2,
+        "batch_size": 4,
+    }
+
+    automl.fit(
+        dataframe=train_dataset[feature_columns+["label"]],
+        label="label",
+        train_data=train_dataset[feature_columns+["label"]],
+        valid_data=valid_dataset[feature_columns+["label"]],
+        X_val=valid_dataset[feature_columns],
+        y_val=valid_dataset["label"],
+        estimator_list=["agtextpredictor"],
+        **automl_settings
+    )
+
+    print("Begin to run inference on test set")
+    score = automl.model.estimator.evaluate(test_dataset)
+    print(f"Inference on test set complete, {metric}: {score}")
+    del automl
+    gc.collect()
\ No newline at end of file
diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py
deleted file mode 100644
index f0cf02c8ba..0000000000
--- a/test/test_agtextpredictor.py
+++ /dev/null
@@ -1,139 +0,0 @@
-from flaml import AutoML
-import pandas as pd
-import requests
-import gc
-import numpy as np
-import os
-import sys
-import platform
-from sklearn.model_selection import train_test_split
-# os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
-
-
-# def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
-#     """
-#     Returns default holdout_frac used in fit().
-#     Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples.
-#     Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243
-#     """
-#     if num_train_rows < 5000:
-#         holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows))
-#     else:
-#         holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows))
-
-#     if hyperparameter_tune:
-#         holdout_frac = min(0.2, holdout_frac * 2)  # to allocate more validation data for HPO to avoid overfitting
-
-#     return holdout_frac
-
-
-def test_ag_text_predictor():
-    # DEBUG
-    return
-    # DEBUG
-    # if sys.version < "3.7":
-    #     # do not test on python3.6
-    #     return
-    # elif platform.system() == "Windows":
-    #     # do not test on windows with py3.8
-    #     return
-
-    # seed = 123
-    # metric = "roc_auc"
-    # train_data = {
-    #     "sentence1": [
-    #         'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
-    #         "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
-    #         "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
-    #         "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
-    #         "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
-    #         "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .",
-    #         "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
-    #         "The DVD-CCA then appealed to the state Supreme Court .",        
-    #     ],
-    #     "sentence2": [
-    #         'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
-    #         "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
-    #         "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
-    #         "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
-    #         "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
-    #         "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
-    #         "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
-    #         "The DVD CCA appealed that decision to the U.S. Supreme Court .",
-    #     ],
-    #     "numerical1": [1, 2, 3, 4, 5, 6, 7, 8],
-    #     "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"],
-    #     "label": [1, 0, 1, 0, 1, 1, 0, 1],
-    #     "idx": [0, 1, 2, 3, 4, 5, 6, 7],
-    # }
-    # train_dataset = pd.DataFrame(train_data)
-
-    # test_data = {
-    #         "sentence1": [
-    #             "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .",
-    #             "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .",
-    #             "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .",
-    #             "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .",
-    #         ],
-    #         "sentence2": [
-    #             "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .",
-    #             "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .",
-    #             "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .",
-    #             "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .",
-    #         ],
-    #         "numerical1": [3, 4, 5, 6],
-    #         "categorical1": ["b", "a", "a", "b"],
-    #         "label": [0, 1, 1, 0],
-    #         "idx": [8, 10, 11, 12],
-    #     }
-    # test_dataset = pd.DataFrame(test_data)
-
-    # # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR
-    # holdout_frac = default_holdout_frac(len(train_dataset), False)
-
-    # _, valid_dataset = train_test_split(train_dataset,
-    #                                 test_size=holdout_frac,
-    #                                 random_state=np.random.RandomState(seed))
-
-    # feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
-
-    # automl = AutoML()
-    # automl_settings = {
-    #     "gpu_per_trial": 0,
-    #     "max_iter": 2,
-    #     "time_budget": 50,
-    #     "task": "binary",
-    #     "metric": "roc_auc",
-    # }
-
-    # automl_settings["custom_fix_args"] = {
-    #     "output_dir": "test/ag/output/",
-    #     "text_backbone": "electra_base",
-    #     "multimodal_fusion_strategy": "fuse_late",
-    #     "dataset_name": "test_ag",
-    #     "label_column": "label",
-    #     "per_device_batch_size": 4,
-    #     "num_train_epochs": 2,
-    #     "batch_size": 4,
-    # }
-
-    # try:
-    #     automl.fit(
-    #         dataframe=train_dataset[feature_columns+["label"]],
-    #         label="label",
-    #         train_data=train_dataset[feature_columns+["label"]],
-    #         valid_data=valid_dataset[feature_columns+["label"]],
-    #         X_val=valid_dataset[feature_columns],
-    #         y_val=valid_dataset["label"],
-    #         estimator_list=["agtextpredictor"],
-    #         **automl_settings
-    #     )
-    # except requests.exceptions.HTTPError:
-    #     return
-
-    # print("Begin to run inference on test set")
-    # score = automl.model.estimator.evaluate(test_dataset)
-    # print(f"Inference on test set complete, {metric}: {score}")
-    # del automl
-    # # del mx
-    # gc.collect()
\ No newline at end of file

From 53b5f09f3b4c733d6952411edf30750a5e8761e1 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Thu, 17 Mar 2022 13:54:31 -0400
Subject: [PATCH 18/50] move new test to test/nlp/

---
 test/{automl => nlp}/test_agtextpredictor.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename test/{automl => nlp}/test_agtextpredictor.py (100%)

diff --git a/test/automl/test_agtextpredictor.py b/test/nlp/test_agtextpredictor.py
similarity index 100%
rename from test/automl/test_agtextpredictor.py
rename to test/nlp/test_agtextpredictor.py

From ee3cacb67c7b957d048484074af9eef9e6b760b2 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Mon, 21 Mar 2022 19:13:18 -0400
Subject: [PATCH 19/50] pass data with X_train

---
 flaml/automl.py                  |   4 +-
 flaml/data.py                    |  12 +++-
 flaml/model.py                   | 110 +++++++++++++++++++------------
 test/nlp/test_agtextpredictor.py |  50 +++++++-------
 4 files changed, 108 insertions(+), 68 deletions(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index cde608a942..69627bf346 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -46,6 +46,7 @@
     REGRESSION,
     _is_nlp_task,
     NLG_TASKS,
+    _is_mm_task,
 )
 from . import tune
 from .training_log import training_log_reader, training_log_writer
@@ -974,7 +975,8 @@ def _validate_data(
                 "or all columns of X are integer ids (tokenized)"
             )
 
-        if issparse(X_train_all):
+        if issparse(X_train_all) or _is_mm_task(self._state.task):
+            # leave the preprocessing to the mm_estimator
             self._transformer = self._label_transformer = False
             self._X_train_all, self._y_train_all = X, y
         else:
diff --git a/flaml/data.py b/flaml/data.py
index 90162ff2f0..4f14582253 100644
--- a/flaml/data.py
+++ b/flaml/data.py
@@ -23,9 +23,11 @@
     SEQCLASSIFICATION,
     MULTICHOICECLASSIFICATION,
     TOKENCLASSIFICATION,
+    "mm_multi",
+    "mm_binary",
 )
 SEQREGRESSION = "seq-regression"
-REGRESSION = ("regression", SEQREGRESSION)
+REGRESSION = ("regression", "mm_regression", SEQREGRESSION)
 TS_FORECASTREGRESSION = (
     "forecast",
     "ts_forecast",
@@ -47,6 +49,14 @@
     TOKENCLASSIFICATION,
 )
 
+MM_TASKS = ("mm_binary", "mm_multi", "mm_regression")
+
+
+ ## ***** ADDED FOR MULTIMODAL *****
+def _is_mm_task(task):
+    return True if task in MM_TASKS else False
+## ***** END ADDED FOR MULTIMODAL *****
+
 
 def _is_nlp_task(task):
     if task in NLU_TASKS or task in NLG_TASKS:
diff --git a/flaml/model.py b/flaml/model.py
index 6d65e69227..263587c468 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -1970,10 +1970,10 @@ class AGTextPredictorEstimator(BaseEstimator):
     The class for tuning AutoGluon TextPredictor
     """
     def __init__(self, task="binary", **params,):
-        from autogluon.text.text_prediction.mx_predictor import MXTextPredictor
+        from autogluon.text import TextPredictor
 
         super().__init__(task, **params)
-        self.estimator_class = MXTextPredictor
+        self.estimator_class = TextPredictor
 
     @classmethod
     def search_space(cls, **params):
@@ -2011,51 +2011,70 @@ def _init_fix_args(self, automl_fit_kwargs: dict=None):
             "multimodal_fusion_strategy":"fuse_late",
         """
         fix_args = {}
-        FIX_ARGS_LIST = ["output_dir", "dataset_name", "label_column", "per_device_batch_size",
-                         "text_backbone", "multimodal_fusion_strategy", "num_train_epochs", "batch_size"]
+        FIX_ARGS_LIST = ["output_dir", "dataset_name", "label_column", "per_device_batch_size", "backend",
+                         "text_backbone", "multimodal_fusion_strategy", "num_train_epochs", "batch_size",]
         for key, value in automl_fit_kwargs["custom_fix_args"].items():
             assert (
                 key in FIX_ARGS_LIST
-            ), "The specified key {} is not in the argument list: output_dir, label_column, dataset_name, text_backbone,\
-                multimodal_fusion_strategy".format(key)
+            ), "The specified key {} is not in the argument list: output_dir, backend, label_column, dataset_name, text_backbone,\
+                multimodal_fusion_strategy, num_train_epochs, batch_size, per_device_batch_size".format(key)
 
             fix_args[key] = value
 
         self.fix_args = fix_args
 
     def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str):
-
         """"
         Ref:
         https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values
         """
-        from autogluon.text.text_prediction.legacy_presets import ag_text_presets
-
-        base_key = f'{text_backbone}_{multimodal_fusion_strategy}'
-        cfg = ag_text_presets.create(base_key)
-        # NOTE: if the search_space() is modified, add new items or delete here too.
-        TUNABLE_HP = set(["model.network.agg_net.mid_units",
-                          "optimization.batch_size",
-                          "optimization.layerwise_lr_decay",
-                          "optimization.lr",
-                          "optimization.nbest",
-                          "optimization.num_train_epochs",
-                          "optimization.per_device_batch_size",
-                          "optimization.wd",
-                          "optimization.warmup_portion",
-                          ])
-        search_space = cfg["models"]["MultimodalTextModel"]["search_space"]
-        search_space["optimization.per_device_batch_size"] = self.fix_args.get("per_device_batch_size", 4)
-        search_space["optimization.num_train_epochs"] = self.fix_args.get("num_train_epochs", 10)
-        search_space["optimization.batch_size"] = self.fix_args.get("batch_size", 128)
-        for key, value in self.params.items():
-            if key in TUNABLE_HP:
-                # NOTE: FLAML uses np.float64 but AG uses float, need to transform
-                if isinstance(value, np.float64):
-                    search_space[key] = value.item()
-                else:
-                    search_space[key] = value
-        return cfg
+        if self.fix_args.get("backend", "pytorch") == "mxnet":
+            from autogluon.text.text_prediction.legacy_presets import ag_text_presets
+
+            base_key = f'{text_backbone}_{multimodal_fusion_strategy}'
+            cfg = ag_text_presets.create(base_key)
+            # NOTE: if the search_space() is modified, add new items or delete here too.
+            TUNABLE_HP = set(["model.network.agg_net.mid_units",
+                            "optimization.batch_size",
+                            "optimization.layerwise_lr_decay",
+                            "optimization.lr",
+                            "optimization.nbest",
+                            "optimization.num_train_epochs",
+                            "optimization.per_device_batch_size",
+                            "optimization.wd",
+                            "optimization.warmup_portion",
+                            ])
+            search_space = cfg["models"]["MultimodalTextModel"]["search_space"]
+            search_space["optimization.per_device_batch_size"] = self.fix_args.get("per_device_batch_size", 4)
+            search_space["optimization.num_train_epochs"] = self.fix_args.get("num_train_epochs", 10)
+            search_space["optimization.batch_size"] = self.fix_args.get("batch_size", 128)
+            for key, value in self.params.items():
+                if key in TUNABLE_HP:
+                    # NOTE: FLAML uses np.float64 but AG uses float, need to transform
+                    if isinstance(value, np.float64):
+                        search_space[key] = value.item()
+                    else:
+                        search_space[key] = value
+            return cfg
+        
+        else:
+            raise ValueError("the pytorch automm model is not supported. ")
+            # from autogluon.text.text_prediction.presets import get_text_preset
+
+            # cfg, overrides = get_text_preset("default")  # get preset for text+num+cat+fusion
+            # # TODO: set the search space for the auto_mm in AG 0.4.0
+            # cfg.hf_text.checkpoint_name = self.fix_args["hf_text.checkpoint_name"]
+            # # get search configs from self.params and set here
+            # TUNABLE_HP = []
+            # for key, value in self.params.items():
+            #     if key in TUNABLE_HP:
+            #         # NOTE: FLAML uses np.float64 but AG uses float, might need to transform
+            #         if isinstance(value, np.float64):
+            #             search_space[key] = value.item()
+            #         else:
+            #             search_space[key] = value
+            return cfg
+            
 
     def _set_seed(self, seed):
         import random
@@ -2086,8 +2105,8 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
 
         # set the hyperparameters
         self.hyperparameters = self._init_hp_config(text_backbone, multimodal_fusion_strategy)
-        PROBLEM_TYPE_MAPPING = {"binary": "binary", "multi": "multiclass", "regression": "regression"}
-        TASK_METRIC_MAPPING = {"multi": "acc", "binary": "roc_auc", "regression": "r2"}
+        PROBLEM_TYPE_MAPPING = {"mm_binary": "binary", "mm_multi": "multiclass", "mm_regression": "regression"}
+        TASK_METRIC_MAPPING = {"mm_multi": "acc", "mm_binary": "roc_auc", "mm_regression": "r2"}
 
         # train the model
         start_time = time.time()
@@ -2095,11 +2114,16 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
         self._model = self.estimator_class(path=ag_model_save_dir,
                                            label=label_column,
                                            problem_type=PROBLEM_TYPE_MAPPING[self._task],
-                                           eval_metric=TASK_METRIC_MAPPING[self._task])
-
-        train_data = self._kwargs["train_data"]
-
+                                           eval_metric=TASK_METRIC_MAPPING[self._task],
+                                           backend=self.fix_args.get("backend", "pytorch"))
+
+        # train_data = self._kwargs["train_data"]
+        import pandas as pd
+        train_data = pd.concat([X_train, y_train], axis=1)
+        tuning_data = pd.concat([X_train, y_train], axis=1)
+        
         self._model.fit(train_data=train_data,
+                        tuning_data=kwargs.get("tuning_data", None),
                         hyperparameters=self.hyperparameters,
                         time_limit=budget,
                         seed=seed)
@@ -2108,7 +2132,7 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
         return training_time
 
     def predict(self, X):
-        output = self._model.predict(self._kwargs["valid_data"], as_pandas=False)
+        output = self._model.predict(X, as_pandas=False)
         return output
 
     def predict_proba(self, X, as_multiclass=True):
@@ -2117,9 +2141,9 @@ def predict_proba(self, X, as_multiclass=True):
             self._task in CLASSIFICATION
         ), "predict_proba() only for classification tasks."
 
-        output = self._model.predict_proba(self._kwargs["valid_data"], as_pandas=False)
+        output = self._model.predict_proba(X, as_pandas=False)
         if not as_multiclass:
-            if self._task == "binary":
+            if self._task == "mm_binary":
                 output = output[:, 1]
         return output
 
diff --git a/test/nlp/test_agtextpredictor.py b/test/nlp/test_agtextpredictor.py
index 5f198db20b..738610a882 100644
--- a/test/nlp/test_agtextpredictor.py
+++ b/test/nlp/test_agtextpredictor.py
@@ -9,7 +9,6 @@
 from sklearn.model_selection import train_test_split
 os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
 
-
 def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
     """
     Returns default holdout_frac used in fit().
@@ -26,11 +25,7 @@ def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
 
     return holdout_frac
 
-
-def test_ag_text_predictor():
-    # # DEBUG
-    # return
-    # # DEBUG
+def test_ag_mx_textpredictor():
     if sys.version < "3.7":
         # do not test on python3.6
         return
@@ -39,7 +34,7 @@ def test_ag_text_predictor():
         return
 
     seed = 123
-    metric = "roc_auc"
+    metric = "accuracy"
     train_data = {
         "sentence1": [
             'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
@@ -49,7 +44,11 @@ def test_ag_text_predictor():
             "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
             "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .",
             "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
-            "The DVD-CCA then appealed to the state Supreme Court .",        
+            "The DVD-CCA then appealed to the state Supreme Court .",
+            "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
+            "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
+            "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
+            "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
         ],
         "sentence2": [
             'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
@@ -60,11 +59,14 @@ def test_ag_text_predictor():
             "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
             "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
             "The DVD CCA appealed that decision to the U.S. Supreme Court .",
+            "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
+            "The DVD-CCA then appealed to the state Supreme Court .",  
+            "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
+            "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
         ],
-        "numerical1": [1, 2, 3, 4, 5, 6, 7, 8],
-        "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"],
-        "label": [1, 0, 1, 0, 1, 1, 0, 1],
-        "idx": [0, 1, 2, 3, 4, 5, 6, 7],
+        "numerical1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+        "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a", "a", "b", "a", "a"],
+        "label": [1, 0, 2, 0, 1, 2, 0, 1, 1, 2, 0, 1],
     }
     train_dataset = pd.DataFrame(train_data)
 
@@ -83,9 +85,8 @@ def test_ag_text_predictor():
             ],
             "numerical1": [3, 4, 5, 6],
             "categorical1": ["b", "a", "a", "b"],
-            "label": [0, 1, 1, 0],
-            "idx": [8, 10, 11, 12],
-        }
+            "label": [0, 1, 1, 2],
+    }
     test_dataset = pd.DataFrame(test_data)
 
     # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR
@@ -94,7 +95,7 @@ def test_ag_text_predictor():
     _, valid_dataset = train_test_split(train_dataset,
                                     test_size=holdout_frac,
                                     random_state=np.random.RandomState(seed))
-
+    
     feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
 
     automl = AutoML()
@@ -102,12 +103,15 @@ def test_ag_text_predictor():
         "gpu_per_trial": 0,
         "max_iter": 2,
         "time_budget": 50,
-        "task": "binary",
-        "metric": "roc_auc",
+        "task": "mm_multi",
+        "metric": "accuracy",
     }
 
     automl_settings["custom_fix_args"] = {
         "output_dir": "test/ag/output/",
+        # "backend": "pytorch",
+        "backend": "mxnet",
+        # "hf_text.checkpoint_name": "google/electra-base-discriminator",
         "text_backbone": "electra_base",
         "multimodal_fusion_strategy": "fuse_late",
         "dataset_name": "test_ag",
@@ -118,17 +122,17 @@ def test_ag_text_predictor():
     }
 
     automl.fit(
-        dataframe=train_dataset[feature_columns+["label"]],
-        label="label",
-        train_data=train_dataset[feature_columns+["label"]],
-        valid_data=valid_dataset[feature_columns+["label"]],
+        X_train=train_dataset[feature_columns],
+        y_train=train_dataset["label"],
         X_val=valid_dataset[feature_columns],
         y_val=valid_dataset["label"],
+        eval_method="holdout",
+        auto_augment=False,
         estimator_list=["agtextpredictor"],
         **automl_settings
     )
 
-    print("Begin to run inference on test set")
+    print("Try to run inference on test set")
     score = automl.model.estimator.evaluate(test_dataset)
     print(f"Inference on test set complete, {metric}: {score}")
     del automl

From 8096a89ce22c66a9559d8e603f38509bc25711df Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Thu, 24 Mar 2022 16:13:42 -0400
Subject: [PATCH 20/50] pr fixes, debugging

---
 flaml/automl.py                  |   3 +-
 flaml/data.py                    |  39 +++----
 flaml/ml.py                      |   6 +-
 flaml/model.py                   | 182 +++++++++++--------------------
 flaml/nlp/utils.py               |  85 +++++++++++++++
 test/nlp/test_agtextpredictor.py |  23 ++--
 6 files changed, 180 insertions(+), 158 deletions(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index 69627bf346..f83dd897ba 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -46,7 +46,6 @@
     REGRESSION,
     _is_nlp_task,
     NLG_TASKS,
-    _is_mm_task,
 )
 from . import tune
 from .training_log import training_log_reader, training_log_writer
@@ -975,7 +974,7 @@ def _validate_data(
                 "or all columns of X are integer ids (tokenized)"
             )
 
-        if issparse(X_train_all) or _is_mm_task(self._state.task):
+        if issparse(X_train_all):
             # leave the preprocessing to the mm_estimator
             self._transformer = self._label_transformer = False
             self._X_train_all, self._y_train_all = X, y
diff --git a/flaml/data.py b/flaml/data.py
index 4f14582253..30968b6012 100644
--- a/flaml/data.py
+++ b/flaml/data.py
@@ -23,11 +23,9 @@
     SEQCLASSIFICATION,
     MULTICHOICECLASSIFICATION,
     TOKENCLASSIFICATION,
-    "mm_multi",
-    "mm_binary",
 )
 SEQREGRESSION = "seq-regression"
-REGRESSION = ("regression", "mm_regression", SEQREGRESSION)
+REGRESSION = ("regression", SEQREGRESSION)
 TS_FORECASTREGRESSION = (
     "forecast",
     "ts_forecast",
@@ -49,14 +47,6 @@
     TOKENCLASSIFICATION,
 )
 
-MM_TASKS = ("mm_binary", "mm_multi", "mm_regression")
-
-
- ## ***** ADDED FOR MULTIMODAL *****
-def _is_mm_task(task):
-    return True if task in MM_TASKS else False
-## ***** END ADDED FOR MULTIMODAL *****
-
 
 def _is_nlp_task(task):
     if task in NLU_TASKS or task in NLG_TASKS:
@@ -282,7 +272,8 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task):
         elif isinstance(X, DataFrame):
             X = X.copy()
             n = X.shape[0]
-            cat_columns, num_columns, datetime_columns = [], [], []
+            # NOTE: add str_columns here
+            str_columns, cat_columns, num_columns, datetime_columns = [], [], [], []
             drop = False
             if task in TS_FORECAST:
                 X = X.rename(columns={X.columns[0]: TS_TIMESTAMP_COL})
@@ -292,13 +283,17 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task):
             for column in X.columns:
                 # sklearn\utils\validation.py needs int/float values
                 if X[column].dtype.name in ("object", "category"):
-                    if (
-                        X[column].nunique() == 1
-                        or X[column].nunique(dropna=True)
-                        == n - X[column].isnull().sum()
-                    ):
+                    if X[column].nunique() == 1:
                         X.drop(columns=column, inplace=True)
                         drop = True
+                    elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.1):
+                    # NOTE: here a threshold is applied for distinguishing str vs. cat 
+                    # if no threshold wanted = requires every non-nan str entry to be different
+                    # delete the line above and uncomment below
+                    # elif X[column].nunique(dropna=True) == n - X[column].isnull().sum():
+                        # NOTE: here detects str fields, fillna with ""
+                        X[column] = X[column].fillna("")
+                        str_columns.append(column)
                     elif X[column].dtype.name == "category":
                         current_categories = X[column].cat.categories
                         if "__NAN__" not in current_categories:
@@ -340,7 +335,7 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task):
                         del tmp_dt
                     X[column] = X[column].fillna(np.nan)
                     num_columns.append(column)
-            X = X[cat_columns + num_columns]
+            X = X[str_columns + cat_columns + num_columns]
             if task in TS_FORECAST:
                 X.insert(0, TS_TIMESTAMP_COL, ds_col)
             if cat_columns:
@@ -369,7 +364,8 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task):
                     ]
                 )
                 X[num_columns] = self.transformer.fit_transform(X_num)
-            self._cat_columns, self._num_columns, self._datetime_columns = (
+            self.str_columns, self._cat_columns, self._num_columns, self._datetime_columns = (
+                str_columns,
                 cat_columns,
                 num_columns,
                 datetime_columns,
@@ -410,7 +406,8 @@ def transform(self, X: Union[DataFrame, np.array]):
             if len(self._str_columns) > 0:
                 X[self._str_columns] = X[self._str_columns].astype("string")
         elif isinstance(X, DataFrame):
-            cat_columns, num_columns, datetime_columns = (
+            str_columns, cat_columns, num_columns, datetime_columns = (
+                self.str_columns,
                 self._cat_columns,
                 self._num_columns,
                 self._datetime_columns,
@@ -436,7 +433,7 @@ def transform(self, X: Union[DataFrame, np.array]):
                         X[new_col_name] = new_col_value
                 X[column] = X[column].map(datetime.toordinal)
                 del tmp_dt
-            X = X[cat_columns + num_columns].copy()
+            X = X[str_columns + cat_columns + num_columns].copy()
             if self._task in TS_FORECAST:
                 X.insert(0, TS_TIMESTAMP_COL, ds_col)
             for column in cat_columns:
diff --git a/flaml/ml.py b/flaml/ml.py
index 55256d3de2..34ef0bdabc 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -37,7 +37,7 @@
     ARIMA,
     SARIMAX,
     TransformersEstimator,
-    AGTextPredictorEstimator,
+    MultiModalEstimator,
 )
 from .data import CLASSIFICATION, group_counts, TS_FORECAST, TS_VALUE_COL
 import logging
@@ -122,8 +122,8 @@ def get_estimator_class(task, estimator_name):
         estimator_class = SARIMAX
     elif estimator_name == "transformer":
         estimator_class = TransformersEstimator
-    elif estimator_name == "agtextpredictor":
-        estimator_class = AGTextPredictorEstimator
+    elif estimator_name == "multimodal":
+        estimator_class = MultiModalEstimator
     else:
         raise ValueError(
             estimator_name + " is not a built-in learner. "
diff --git a/flaml/model.py b/flaml/model.py
index 263587c468..a980dceb41 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -1965,16 +1965,10 @@ class XGBoostLimitDepth_TS(TS_SKLearn):
     base_class = XGBoostLimitDepthEstimator
 
 
-class AGTextPredictorEstimator(BaseEstimator):
+class MultiModalEstimator(BaseEstimator):
     """
     The class for tuning AutoGluon TextPredictor
     """
-    def __init__(self, task="binary", **params,):
-        from autogluon.text import TextPredictor
-
-        super().__init__(task, **params)
-        self.estimator_class = TextPredictor
-
     @classmethod
     def search_space(cls, **params):
         """
@@ -1982,6 +1976,7 @@ def search_space(cls, **params):
         reference:
         https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values
         """
+        # TODO: expand the search space
         search_space_dict = {
             "model.network.agg_net.mid_units": {
                 "domain": tune.choice(list(range(32, 129))),
@@ -1993,138 +1988,91 @@ def search_space(cls, **params):
             },
             "optimization.wd": {
                 "domain": tune.choice([1E-4, 1E-3, 1E-2]),
-                "init_value":1E-4,
+                "init_value": 1E-4,
             },
             "optimization.warmup_portion": {
                 "domain": tune.choice([0.1, 0.2]),
-                "init_value":0.1, 
+                "init_value": 0.1, 
+            },
+            "optimization.layerwise_lr_decay": {
+                "domain": tune.choice([0.8, 0.9]),
+                "init_value": 0.8,
+            },
+            "optimization.nbest": {
+                "domain": tune.choice([2, 3, 4,]),
+                "init_value": 3,
+            },
+            "optimization.num_train_epochs": {
+                "domain": tune.choice([5, 10, 15,]),
+                "init_value": 10,
+            },
+            "optimization.per_device_batch_size": {
+                "domain": tune.choice([2, 4, 8,]),
+                "init_value": 10,
+            },
+            "optimization.batch_size": {
+                "domain": tune.choice([32, 64, 128,]),
+                "init_value": 128,
             },
         }
         return search_space_dict
 
-    def _init_fix_args(self, automl_fit_kwargs: dict=None):
-        """
-        Save the customed fix args here
-        this includes:
-            "output_dir",
-            "text_backbone": "electra_base"
-            "multimodal_fusion_strategy":"fuse_late",
-        """
-        fix_args = {}
-        FIX_ARGS_LIST = ["output_dir", "dataset_name", "label_column", "per_device_batch_size", "backend",
-                         "text_backbone", "multimodal_fusion_strategy", "num_train_epochs", "batch_size",]
-        for key, value in automl_fit_kwargs["custom_fix_args"].items():
-            assert (
-                key in FIX_ARGS_LIST
-            ), "The specified key {} is not in the argument list: output_dir, backend, label_column, dataset_name, text_backbone,\
-                multimodal_fusion_strategy, num_train_epochs, batch_size, per_device_batch_size".format(key)
+    def _init_ag_args(self, automl_fit_kwargs: dict = None):
+        from .nlp.utils import AGArgs
 
-            fix_args[key] = value
-
-        self.fix_args = fix_args
-
-    def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str):
-        """"
-        Ref:
-        https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values
-        """
-        if self.fix_args.get("backend", "pytorch") == "mxnet":
-            from autogluon.text.text_prediction.legacy_presets import ag_text_presets
-
-            base_key = f'{text_backbone}_{multimodal_fusion_strategy}'
-            cfg = ag_text_presets.create(base_key)
-            # NOTE: if the search_space() is modified, add new items or delete here too.
-            TUNABLE_HP = set(["model.network.agg_net.mid_units",
-                            "optimization.batch_size",
-                            "optimization.layerwise_lr_decay",
-                            "optimization.lr",
-                            "optimization.nbest",
-                            "optimization.num_train_epochs",
-                            "optimization.per_device_batch_size",
-                            "optimization.wd",
-                            "optimization.warmup_portion",
-                            ])
-            search_space = cfg["models"]["MultimodalTextModel"]["search_space"]
-            search_space["optimization.per_device_batch_size"] = self.fix_args.get("per_device_batch_size", 4)
-            search_space["optimization.num_train_epochs"] = self.fix_args.get("num_train_epochs", 10)
-            search_space["optimization.batch_size"] = self.fix_args.get("batch_size", 128)
-            for key, value in self.params.items():
-                if key in TUNABLE_HP:
-                    # NOTE: FLAML uses np.float64 but AG uses float, need to transform
-                    if isinstance(value, np.float64):
-                        search_space[key] = value.item()
-                    else:
-                        search_space[key] = value
-            return cfg
-        
-        else:
-            raise ValueError("the pytorch automm model is not supported. ")
-            # from autogluon.text.text_prediction.presets import get_text_preset
-
-            # cfg, overrides = get_text_preset("default")  # get preset for text+num+cat+fusion
-            # # TODO: set the search space for the auto_mm in AG 0.4.0
-            # cfg.hf_text.checkpoint_name = self.fix_args["hf_text.checkpoint_name"]
-            # # get search configs from self.params and set here
-            # TUNABLE_HP = []
-            # for key, value in self.params.items():
-            #     if key in TUNABLE_HP:
-            #         # NOTE: FLAML uses np.float64 but AG uses float, might need to transform
-            #         if isinstance(value, np.float64):
-            #             search_space[key] = value.item()
-            #         else:
-            #             search_space[key] = value
-            return cfg
-            
+        ag_args = AGArgs()
+        for key, val in automl_fit_kwargs["ag_args"].items():
+            assert (
+                key in ag_args.__dict__
+            ), "The specified key {} is not in the argument list of flaml.nlp.utils::AGArgs".format(
+                key
+            )
+            setattr(ag_args, key, val)
+        self.ag_args = ag_args
 
     def _set_seed(self, seed):
         import random
         import mxnet as mx
-        import torch as th
-        th.manual_seed(seed)
+        # import torch as th
+        # th.manual_seed(seed)
         mx.random.seed(seed)
         np.random.seed(seed)
         random.seed(seed)
 
     def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
+        from autogluon.text import TextPredictor
+
         self._kwargs = kwargs
-        self._init_fix_args(kwargs)
-        # the seed set in the bash script for ag experiment is 123
-        seed = self.params.get("seed", 123)
+        self._init_ag_args(kwargs)
+        seed = self._kwargs.get("seed", 123)
         self._set_seed(seed)
 
-        # get backbone and fusion strategy
-        text_backbone = self.fix_args["text_backbone"]
-        multimodal_fusion_strategy = self.fix_args["multimodal_fusion_strategy"]
-
-        # get & set the save dir, get the dataset info
-        save_dir = self.fix_args["output_dir"]
-        label_column = self.fix_args["label_column"]
-        dataset_name = self.fix_args["dataset_name"]
-        ag_model_save_dir = os.path.join(save_dir, f"{dataset_name}_ag_text_multimodal_{text_backbone}\
-                                                    _{multimodal_fusion_strategy}_no_ensemble")
-
-        # set the hyperparameters
-        self.hyperparameters = self._init_hp_config(text_backbone, multimodal_fusion_strategy)
-        PROBLEM_TYPE_MAPPING = {"mm_binary": "binary", "mm_multi": "multiclass", "mm_regression": "regression"}
-        TASK_METRIC_MAPPING = {"mm_multi": "acc", "mm_binary": "roc_auc", "mm_regression": "r2"}
+        assert (self.ag_args.backend == "mxnet"), "the pytorch automm model is not supported. "
+        # get & set the hyperparameters, update with self.params
+        hyperparameters = self.ag_args.get_presets()
+        search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"]
+        for key, value in self.params.items():
+            # NOTE: FLAML uses np.float64 but AG uses float, need to transform
+            if isinstance(value, np.float64):
+                search_space[key] = value.item()
+            else:
+                search_space[key] = value
 
+        PROBLEM_TYPE_MAPPING = {"binary": "binary",
+                                "multi": "multiclass",
+                                "regression": "regression"
+        }
         # train the model
         start_time = time.time()
+        self._model = TextPredictor(path=self.ag_args.output_dir,
+                                    label="label",
+                                    problem_type=PROBLEM_TYPE_MAPPING[self._task],
+                                    eval_metric=kwargs["metric"],
+                                    backend=self.ag_args.backend)
 
-        self._model = self.estimator_class(path=ag_model_save_dir,
-                                           label=label_column,
-                                           problem_type=PROBLEM_TYPE_MAPPING[self._task],
-                                           eval_metric=TASK_METRIC_MAPPING[self._task],
-                                           backend=self.fix_args.get("backend", "pytorch"))
-
-        # train_data = self._kwargs["train_data"]
-        import pandas as pd
-        train_data = pd.concat([X_train, y_train], axis=1)
-        tuning_data = pd.concat([X_train, y_train], axis=1)
-        
+        train_data = TransformersEstimator._join(X_train, y_train)
         self._model.fit(train_data=train_data,
-                        tuning_data=kwargs.get("tuning_data", None),
-                        hyperparameters=self.hyperparameters,
+                        hyperparameters=hyperparameters,
                         time_limit=budget,
                         seed=seed)
 
@@ -2135,16 +2083,12 @@ def predict(self, X):
         output = self._model.predict(X, as_pandas=False)
         return output
 
-    def predict_proba(self, X, as_multiclass=True):
+    def predict_proba(self, X):
         # only works for classification tasks
         assert (
             self._task in CLASSIFICATION
         ), "predict_proba() only for classification tasks."
-
         output = self._model.predict_proba(X, as_pandas=False)
-        if not as_multiclass:
-            if self._task == "mm_binary":
-                output = output[:, 1]
         return output
 
 
diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py
index 22bd25faaa..98a35ee49a 100644
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@@ -573,3 +573,88 @@ def load_args():
             )
         console_args, unknown = arg_parser.parse_known_args()
         return console_args
+
+
+@dataclass
+class AGArgs:
+    """
+    The Autogluon configurations
+    Args:
+        output_dir (str): data root directory for outputing the log and intermediate data, model.
+        backend (str, optional, defaults to "mxnet"): currently only support to mxnet.
+        text_backbone (str, optional, defaults to "electra_base"): the text backbone model.
+        multimodal_fusion_strategy (str, optional, defaults to "fuse_late"): the fuse strategy.
+    """
+    from autogluon.text.text_prediction.legacy_presets import ag_text_presets
+
+    output_dir: str = field(
+        default="data/mm/output/", metadata={"help": "data dir", "required": True}
+    )
+    backend: str = field(default="mxnet", metadata={"help": "the backend of the multimodal model"})
+    text_backbone: str = field(default="electra_base", metadata={"help": "text backbone model"})
+    multimodal_fusion_strategy: str = field(default="fuse_late", metadata={"help": "fusion strategy"})
+    # TODO: determine whether to tune these HPs 
+    # per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"})
+    # num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"})
+    # batch_size: int = field(default=128,  metadata={"help": "batch size"})
+    
+
+    def get_presets(self):
+        """
+        Get the preset using the AGArgs.
+        {'models': {'MultimodalTextModel': {'backend': 'gluonnlp_v0',
+                                    'search_space': {'model.backbone.name': 'google_electra_small',
+                                                    'model.network.agg_net.agg_type': 'concat',
+                                                    'model.network.agg_net.mid_units': 128,  # [in HPO example]
+                                                    'model.network.aggregate_categorical': True,
+                                                    'model.use_avg_nbest': True,
+                                                    'optimization.batch_size': 128,
+                                                    'optimization.layerwise_lr_decay': 0.8,
+                                                    'optimization.lr': Categorical[0.0001],
+                                                    'optimization.nbest': 3,
+                                                    'optimization.num_train_epochs': 10,
+                                                    'optimization.per_device_batch_size': 8,
+                                                    'optimization.wd': 0.0001,
+                                                    'optimization.warmup_portion': 0.1,  # [in HPO example]
+                                                    'preprocessing.categorical.convert_to_text': False,
+                                                    'preprocessing.numerical.convert_to_text': False}}},
+        'tune_kwargs': {'num_trials': 1,
+                        'scheduler_options': None,
+                        'search_options': None,
+                        'search_strategy': 'local',
+                        'searcher': 'random'}}
+        Ref: https://auto.gluon.ai/0.3.1/tutorials/text_prediction/customization.html
+        Return:
+            hyperparameters: a Dict of the hyperparameter settings.
+        """
+        from autogluon.text.text_prediction.legacy_presets import ag_text_presets
+
+        base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}'
+        hyperparameters = ag_text_presets.create(base_key)
+        search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"]
+        # TODO: set anything that would like to be set via ag_args here
+
+        return hyperparameters
+
+
+    @staticmethod
+    def load_args():
+        from dataclasses import fields
+
+        arg_parser = argparse.ArgumentParser()
+        for each_field in fields(AGArgs):
+            print(each_field)
+            arg_parser.add_argument(
+                "--" + each_field.name,
+                type=each_field.type,
+                help=each_field.metadata["help"],
+                required=each_field.metadata["required"]
+                if "required" in each_field.metadata
+                else False,
+                choices=each_field.metadata["choices"]
+                if "choices" in each_field.metadata
+                else None,
+                default=each_field.default,
+            )
+        console_args, unknown = arg_parser.parse_known_args()
+        return console_args
diff --git a/test/nlp/test_agtextpredictor.py b/test/nlp/test_agtextpredictor.py
index 738610a882..c8fe0bc66a 100644
--- a/test/nlp/test_agtextpredictor.py
+++ b/test/nlp/test_agtextpredictor.py
@@ -25,7 +25,7 @@ def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
 
     return holdout_frac
 
-def test_ag_mx_textpredictor():
+def test_multimodalestimator():
     if sys.version < "3.7":
         # do not test on python3.6
         return
@@ -103,22 +103,15 @@ def test_ag_mx_textpredictor():
         "gpu_per_trial": 0,
         "max_iter": 2,
         "time_budget": 50,
-        "task": "mm_multi",
+        "task": "classification",
         "metric": "accuracy",
     }
-
-    automl_settings["custom_fix_args"] = {
+    # TODO: modify and double check
+    automl_settings["ag_args"] = {
         "output_dir": "test/ag/output/",
-        # "backend": "pytorch",
         "backend": "mxnet",
-        # "hf_text.checkpoint_name": "google/electra-base-discriminator",
         "text_backbone": "electra_base",
         "multimodal_fusion_strategy": "fuse_late",
-        "dataset_name": "test_ag",
-        "label_column": "label",
-        "per_device_batch_size": 4,
-        "num_train_epochs": 2,
-        "batch_size": 4,
     }
 
     automl.fit(
@@ -128,7 +121,7 @@ def test_ag_mx_textpredictor():
         y_val=valid_dataset["label"],
         eval_method="holdout",
         auto_augment=False,
-        estimator_list=["agtextpredictor"],
+        estimator_list=["multimodal"],
         **automl_settings
     )
 
@@ -136,4 +129,8 @@ def test_ag_mx_textpredictor():
     score = automl.model.estimator.evaluate(test_dataset)
     print(f"Inference on test set complete, {metric}: {score}")
     del automl
-    gc.collect()
\ No newline at end of file
+    gc.collect()
+
+
+if __name__ == "__main__":
+    test_multimodalestimator()
\ No newline at end of file

From c40af7d3db28f7862d945800b220c1dd347805ad Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Thu, 24 Mar 2022 17:35:41 -0400
Subject: [PATCH 21/50] Rename to MultimodalEstimator, pr fix

---
 flaml/ml.py                                               | 3 +++
 flaml/model.py                                            | 8 ++++----
 flaml/nlp/utils.py                                        | 6 ++----
 ...est_agtextpredictor.py => test_multimodalestimator.py} | 6 +-----
 4 files changed, 10 insertions(+), 13 deletions(-)
 rename test/nlp/{test_agtextpredictor.py => test_multimodalestimator.py} (98%)

diff --git a/flaml/ml.py b/flaml/ml.py
index 246dd46b0e..f8ccb2f915 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -568,6 +568,9 @@ def compute_estimator(
 
     if isinstance(estimator, TransformersEstimator):
         fit_kwargs["metric"] = eval_metric
+    
+    elif isinstance(estimator, MultiModalEstimator):
+        fit_kwargs["metric"] = eval_metric
 
     if "holdout" == eval_method:
         val_loss, metric_for_logging, train_time, pred_time = get_val_loss(
diff --git a/flaml/model.py b/flaml/model.py
index c33a9baf18..65e041dc74 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -2076,6 +2076,7 @@ def _init_ag_args(self, automl_fit_kwargs: dict = None):
     def _set_seed(self, seed):
         import random
         import mxnet as mx
+        # NOTE: if support pytorch backend, uncomment below
         # import torch as th
         # th.manual_seed(seed)
         mx.random.seed(seed)
@@ -2096,23 +2097,22 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
         search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"]
         for key, value in self.params.items():
             # NOTE: FLAML uses np.float64 but AG uses float, need to transform
-            if isinstance(value, np.float64):
+            if key == "n_jobs": 
+                continue
+            elif isinstance(value, np.float64):
                 search_space[key] = value.item()
             else:
                 search_space[key] = value
-
         PROBLEM_TYPE_MAPPING = {"binary": "binary",
                                 "multi": "multiclass",
                                 "regression": "regression"
         }
-        # train the model
         start_time = time.time()
         self._model = TextPredictor(path=self.ag_args.output_dir,
                                     label="label",
                                     problem_type=PROBLEM_TYPE_MAPPING[self._task],
                                     eval_metric=kwargs["metric"],
                                     backend=self.ag_args.backend)
-
         train_data = TransformersEstimator._join(X_train, y_train)
         self._model.fit(train_data=train_data,
                         hyperparameters=hyperparameters,
diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py
index 9e8fc4216d..ce18071eef 100644
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@@ -681,15 +681,13 @@ def get_presets(self):
                         'searcher': 'random'}}
         Ref: https://auto.gluon.ai/0.3.1/tutorials/text_prediction/customization.html
         Return:
-            hyperparameters: a Dict of the hyperparameter settings.
+            hyperparameters: a Dict of the preset hyperparameter settings.
         """
         from autogluon.text.text_prediction.legacy_presets import ag_text_presets
 
         base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}'
         hyperparameters = ag_text_presets.create(base_key)
-        search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"]
-        # TODO: set anything that would like to be set via ag_args here
-
+        # NOTE: set anything else that would like to be set via ag_args here
         return hyperparameters
 
 
diff --git a/test/nlp/test_agtextpredictor.py b/test/nlp/test_multimodalestimator.py
similarity index 98%
rename from test/nlp/test_agtextpredictor.py
rename to test/nlp/test_multimodalestimator.py
index c8fe0bc66a..0ba8e5ec10 100644
--- a/test/nlp/test_agtextpredictor.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -106,7 +106,7 @@ def test_multimodalestimator():
         "task": "classification",
         "metric": "accuracy",
     }
-    # TODO: modify and double check
+
     automl_settings["ag_args"] = {
         "output_dir": "test/ag/output/",
         "backend": "mxnet",
@@ -130,7 +130,3 @@ def test_multimodalestimator():
     print(f"Inference on test set complete, {metric}: {score}")
     del automl
     gc.collect()
-
-
-if __name__ == "__main__":
-    test_multimodalestimator()
\ No newline at end of file

From d0b3b11b7a9bef80751b3f4591a689abf380c222 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Thu, 24 Mar 2022 17:46:14 -0400
Subject: [PATCH 22/50] remove comment

---
 flaml/automl.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index 63da070c91..1571ece494 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -933,7 +933,6 @@ def _validate_data(
             )
 
         if issparse(X_train_all):
-            # leave the preprocessing to the mm_estimator
             self._transformer = self._label_transformer = False
             self._X_train_all, self._y_train_all = X, y
         else:

From 30e9f60cfa487fb2aac2de54b69d175c65c4b858 Mon Sep 17 00:00:00 2001
From: Qiaochu Song <qcsong0818@gmail.com>
Date: Fri, 25 Mar 2022 11:06:36 -0400
Subject: [PATCH 23/50] Update data.py

bug fix
---
 flaml/data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flaml/data.py b/flaml/data.py
index 30968b6012..eb27e5bf74 100644
--- a/flaml/data.py
+++ b/flaml/data.py
@@ -286,7 +286,7 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task):
                     if X[column].nunique() == 1:
                         X.drop(columns=column, inplace=True)
                         drop = True
-                    elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.1):
+                    elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.9):
                     # NOTE: here a threshold is applied for distinguishing str vs. cat 
                     # if no threshold wanted = requires every non-nan str entry to be different
                     # delete the line above and uncomment below

From d15dd60590a68a2e75cfa4adef8c7ac0be0be5dd Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Fri, 25 Mar 2022 11:10:40 -0400
Subject: [PATCH 24/50] fix bug

---
 flaml/data.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flaml/data.py b/flaml/data.py
index 30968b6012..144a94916b 100644
--- a/flaml/data.py
+++ b/flaml/data.py
@@ -286,12 +286,12 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task):
                     if X[column].nunique() == 1:
                         X.drop(columns=column, inplace=True)
                         drop = True
-                    elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.1):
+                    elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.9):
                     # NOTE: here a threshold is applied for distinguishing str vs. cat 
-                    # if no threshold wanted = requires every non-nan str entry to be different
+                    # if no threshold wanted => requires every non-nan str entry to be different
                     # delete the line above and uncomment below
                     # elif X[column].nunique(dropna=True) == n - X[column].isnull().sum():
-                        # NOTE: here detects str fields, fillna with ""
+                        # NOTE: here detects str fields and do fillna with ""
                         X[column] = X[column].fillna("")
                         str_columns.append(column)
                     elif X[column].dtype.name == "category":

From 301eb16e0546b7d82bf755b70c8ec79869274537 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Fri, 25 Mar 2022 12:03:23 -0400
Subject: [PATCH 25/50] remove useless import

---
 flaml/nlp/utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py
index ce18071eef..65277a7082 100644
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@@ -641,8 +641,6 @@ class AGArgs:
         text_backbone (str, optional, defaults to "electra_base"): the text backbone model.
         multimodal_fusion_strategy (str, optional, defaults to "fuse_late"): the fuse strategy.
     """
-    from autogluon.text.text_prediction.legacy_presets import ag_text_presets
-
     output_dir: str = field(
         default="data/mm/output/", metadata={"help": "data dir", "required": True}
     )

From c59a3b27bf04eb11cd7e52c24f111a24d0a08109 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Fri, 25 Mar 2022 12:03:23 -0400
Subject: [PATCH 26/50] remove useless import

---
 flaml/nlp/utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py
index ce18071eef..65277a7082 100644
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@@ -641,8 +641,6 @@ class AGArgs:
         text_backbone (str, optional, defaults to "electra_base"): the text backbone model.
         multimodal_fusion_strategy (str, optional, defaults to "fuse_late"): the fuse strategy.
     """
-    from autogluon.text.text_prediction.legacy_presets import ag_text_presets
-
     output_dir: str = field(
         default="data/mm/output/", metadata={"help": "data dir", "required": True}
     )

From ea515d2ffbbede41ac6768a962cace2877ec364e Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Mon, 28 Mar 2022 15:25:54 -0400
Subject: [PATCH 27/50] remove task mapping for AG

---
 flaml/model.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/flaml/model.py b/flaml/model.py
index 3044585a94..d93a6fc6ee 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -2194,14 +2194,10 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
                 search_space[key] = value.item()
             else:
                 search_space[key] = value
-        PROBLEM_TYPE_MAPPING = {"binary": "binary",
-                                "multi": "multiclass",
-                                "regression": "regression"
-        }
         start_time = time.time()
         self._model = TextPredictor(path=self.ag_args.output_dir,
                                     label="label",
-                                    problem_type=PROBLEM_TYPE_MAPPING[self._task],
+                                    problem_type=self._task,
                                     eval_metric=kwargs["metric"],
                                     backend=self.ag_args.backend)
         train_data = TransformersEstimator._join(X_train, y_train)

From 6cc2f9ef89779f451634aa4a5a9916f6f86bd41a Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Wed, 13 Apr 2022 19:48:22 -0400
Subject: [PATCH 28/50] use 0.5 threshold for text/cat inference

---
 flaml/automl.py                      |   8 +++
 flaml/data.py                        |   9 ++-
 flaml/model.py                       | 102 ++++++++++++---------------
 flaml/nlp/utils.py                   |  47 ++++--------
 test/nlp/test_multimodalestimator.py |  40 ++++-------
 5 files changed, 84 insertions(+), 122 deletions(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index bf3d3ce14f..53d52541d1 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -2280,6 +2280,14 @@ def is_to_reverse_metric(metric, task):
             )
         logger.info("List of ML learners in AutoML Run: {}".format(estimator_list))
         self.estimator_list = estimator_list
+        if self._transformer.text_columns:
+            if len(self._transformer.text_columns) == len(X_train.columns):
+                assert _is_nlp_task(self._state.task) == True
+            else:
+                self.estimator_list = ["multimodal"]
+                logger.warning("columns type of {} are set to text".format(self._transformer.text_columns))
+                logger.info("numerical columns {}".format(self._transformer._num_columns))
+                logger.info("categorical columns {}".format(self._transformer._cat_columns))
         self._state.time_budget = time_budget if time_budget > 0 else 1e10
         self._active_estimators = estimator_list.copy()
         self._ensemble = ensemble
diff --git a/flaml/data.py b/flaml/data.py
index 26cc8ae0a1..4ab80fe600 100644
--- a/flaml/data.py
+++ b/flaml/data.py
@@ -245,6 +245,9 @@ def concat(X1, X2):
 
 class DataTransformer:
     """Transform input training data."""
+    @property
+    def text_columns(self):
+        return self._str_columns
 
     def fit_transform(self, X: Union[DataFrame, np.array], y, task):
         """Fit transformer and process the input training data according to the task type.
@@ -286,7 +289,7 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task):
                     if X[column].nunique() == 1:
                         X.drop(columns=column, inplace=True)
                         drop = True
-                    elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.9):
+                    elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.5):
                     # NOTE: here a threshold is applied for distinguishing str vs. cat 
                     # if no threshold wanted => requires every non-nan str entry to be different
                     # delete the line above and uncomment below
@@ -364,7 +367,7 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task):
                     ]
                 )
                 X[num_columns] = self.transformer.fit_transform(X_num)
-            self.str_columns, self._cat_columns, self._num_columns, self._datetime_columns = (
+            self._str_columns, self._cat_columns, self._num_columns, self._datetime_columns = (
                 str_columns,
                 cat_columns,
                 num_columns,
@@ -407,7 +410,7 @@ def transform(self, X: Union[DataFrame, np.array]):
                 X[self._str_columns] = X[self._str_columns].astype("string")
         elif isinstance(X, DataFrame):
             str_columns, cat_columns, num_columns, datetime_columns = (
-                self.str_columns,
+                self._str_columns,
                 self._cat_columns,
                 self._num_columns,
                 self._datetime_columns,
diff --git a/flaml/model.py b/flaml/model.py
index d93a6fc6ee..e4c01892f9 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -132,6 +132,13 @@ def estimator(self):
     def _preprocess(self, X):
         return X
 
+    @staticmethod
+    def _join(X_train, y_train):
+        y_train = DataFrame(y_train, index=X_train.index)
+        y_train.columns = ["label"]
+        train_df = X_train.join(y_train)
+        return train_df
+
     def _fit(self, X_train, y_train, **kwargs):
 
         current_time = time.time()
@@ -361,13 +368,6 @@ def __init__(self, task="seq-classification", **config):
             from transformers import TrainingArguments
         self._TrainingArguments = TrainingArguments
 
-    @staticmethod
-    def _join(X_train, y_train):
-        y_train = DataFrame(y_train, index=X_train.index)
-        y_train.columns = ["label"]
-        train_df = X_train.join(y_train)
-        return train_df
-
     @classmethod
     def search_space(cls, data_size, task, **params):
         search_space_dict = {
@@ -593,7 +593,7 @@ def on_epoch_end(self, args, state, control, **callback_kwargs):
             )
 
         train_dataset = Dataset.from_pandas(
-            TransformersEstimator._join(self._X_train, self._y_train)
+            BaseEstimator._join(self._X_train, self._y_train)
         )
 
         if X_val is not None:
@@ -603,7 +603,7 @@ def on_epoch_end(self, args, state, control, **callback_kwargs):
             else:
                 self._X_val, self._y_val = self._preprocess(X=X_val, y=y_val, **kwargs)
             eval_dataset = Dataset.from_pandas(
-                TransformersEstimator._join(self._X_val, self._y_val)
+                BaseEstimator._join(self._X_val, self._y_val)
             )
         else:
             eval_dataset = None
@@ -831,7 +831,7 @@ def score(self, X_val: DataFrame, y_val: Series, **kwargs):
             self._X_val, self._y_val = self._preprocess(X=X_val, y=y_val)
 
         eval_dataset = Dataset.from_pandas(
-            TransformersEstimator._join(self._X_val, self._y_val)
+            BaseEstimator._join(self._X_val, self._y_val)
         )
 
         new_trainer, training_args = self._init_model_for_predict()
@@ -2103,6 +2103,12 @@ class MultiModalEstimator(BaseEstimator):
     """
     The class for tuning AutoGluon TextPredictor
     """
+    def __init__(self, task="binary", **config):
+        super().__init__(task, **config)
+        import uuid
+
+        self.trial_id = str(uuid.uuid1().hex)[:8]
+
     @classmethod
     def search_space(cls, **params):
         """
@@ -2128,26 +2134,6 @@ def search_space(cls, **params):
                 "domain": tune.choice([0.1, 0.2]),
                 "init_value": 0.1, 
             },
-            "optimization.layerwise_lr_decay": {
-                "domain": tune.choice([0.8, 0.9]),
-                "init_value": 0.8,
-            },
-            "optimization.nbest": {
-                "domain": tune.choice([2, 3, 4,]),
-                "init_value": 3,
-            },
-            "optimization.num_train_epochs": {
-                "domain": tune.choice([5, 10, 15,]),
-                "init_value": 10,
-            },
-            "optimization.per_device_batch_size": {
-                "domain": tune.choice([2, 4, 8,]),
-                "init_value": 10,
-            },
-            "optimization.batch_size": {
-                "domain": tune.choice([32, 64, 128,]),
-                "init_value": 128,
-            },
         }
         return search_space_dict
 
@@ -2164,63 +2150,65 @@ def _init_ag_args(self, automl_fit_kwargs: dict = None):
             setattr(ag_args, key, val)
         self.ag_args = ag_args
 
-    def _set_seed(self, seed):
-        import random
-        import mxnet as mx
-        # NOTE: if support pytorch backend, uncomment below
-        # import torch as th
-        # th.manual_seed(seed)
-        mx.random.seed(seed)
-        np.random.seed(seed)
-        random.seed(seed)
-
     def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
         from autogluon.text import TextPredictor
 
         self._kwargs = kwargs
         self._init_ag_args(kwargs)
         seed = self._kwargs.get("seed", 123)
-        self._set_seed(seed)
 
         assert (self.ag_args.backend == "mxnet"), "the pytorch automm model is not supported. "
         # get & set the hyperparameters, update with self.params
-        hyperparameters = self.ag_args.get_presets()
+        hyperparameters = self.ag_args.hyperparameters
         search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"]
         for key, value in self.params.items():
             # NOTE: FLAML uses np.float64 but AG uses float, need to transform
             if key == "n_jobs": 
                 continue
-            elif isinstance(value, np.float64):
-                search_space[key] = value.item()
             else:
-                search_space[key] = value
+                search_space[key] = value.item() if isinstance(value, np.float64) else value
         start_time = time.time()
-        self._model = TextPredictor(path=self.ag_args.output_dir,
-                                    label="label",
-                                    problem_type=self._task,
-                                    eval_metric=kwargs["metric"],
-                                    backend=self.ag_args.backend)
-        train_data = TransformersEstimator._join(X_train, y_train)
-        self._model.fit(train_data=train_data,
-                        hyperparameters=hyperparameters,
-                        time_limit=budget,
-                        seed=seed)
+        self.model_path = os.path.join(self.ag_args.output_dir, self.trial_id)
+        model = TextPredictor(path=self.model_path,
+                              label="label",
+                              problem_type=self._task,
+                              eval_metric=kwargs["metric"],
+                              backend=self.ag_args.backend)
+        train_data = BaseEstimator._join(X_train, y_train)
+        model.fit(train_data=train_data,
+                  hyperparameters=hyperparameters,
+                  num_gpus=kwargs.get("gpu_per_trial", None),
+                  time_limit=budget,
+                  seed=seed)
 
         training_time = time.time() - start_time
         return training_time
 
     def predict(self, X):
-        output = self._model.predict(X, as_pandas=False)
+        from autogluon.text import TextPredictor
+
+        model = TextPredictor.load(path=self.model_path, backend=self.ag_args.backend)
+        output = model.predict(X, as_pandas=False)
         return output
 
     def predict_proba(self, X):
+        from autogluon.text import TextPredictor
+
         # only works for classification tasks
         assert (
             self._task in CLASSIFICATION
         ), "predict_proba() only for classification tasks."
-        output = self._model.predict_proba(X, as_pandas=False)
+        model = TextPredictor.load(path=self.model_path, backend=self.ag_args.backend)
+        output = model.predict_proba(X, as_pandas=False)
         return output
 
+    def score(self, X_val: DataFrame, y_val: Series, **kwargs):
+        from autogluon.text import TextPredictor
+
+        model = TextPredictor.load(path=self.model_path, backend=self.ag_args.backend)
+        val_data = BaseEstimator._join(X_val, y_val)
+        return model.evaluate(val_data)
+        
 
 class suppress_stdout_stderr(object):
     def __init__(self):
diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py
index 0565079b7f..a4cf0779ff 100644
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@@ -650,47 +650,25 @@ class AGArgs:
     backend: str = field(default="mxnet", metadata={"help": "the backend of the multimodal model"})
     text_backbone: str = field(default="electra_base", metadata={"help": "text backbone model"})
     multimodal_fusion_strategy: str = field(default="fuse_late", metadata={"help": "fusion strategy"})
-    # TODO: determine whether to tune these HPs 
-    # per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"})
-    # num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"})
-    # batch_size: int = field(default=128,  metadata={"help": "batch size"})
-    
+    per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"})
+    num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"})
+    batch_size: int = field(default=128,  metadata={"help": "batch size"})
+    hyperparameters: dict = field(init=False)
 
-    def get_presets(self):
+    def __post_init__(self):
         """
-        Get the preset using the AGArgs.
-        {'models': {'MultimodalTextModel': {'backend': 'gluonnlp_v0',
-                                    'search_space': {'model.backbone.name': 'google_electra_small',
-                                                    'model.network.agg_net.agg_type': 'concat',
-                                                    'model.network.agg_net.mid_units': 128,  # [in HPO example]
-                                                    'model.network.aggregate_categorical': True,
-                                                    'model.use_avg_nbest': True,
-                                                    'optimization.batch_size': 128,
-                                                    'optimization.layerwise_lr_decay': 0.8,
-                                                    'optimization.lr': Categorical[0.0001],
-                                                    'optimization.nbest': 3,
-                                                    'optimization.num_train_epochs': 10,
-                                                    'optimization.per_device_batch_size': 8,
-                                                    'optimization.wd': 0.0001,
-                                                    'optimization.warmup_portion': 0.1,  # [in HPO example]
-                                                    'preprocessing.categorical.convert_to_text': False,
-                                                    'preprocessing.numerical.convert_to_text': False}}},
-        'tune_kwargs': {'num_trials': 1,
-                        'scheduler_options': None,
-                        'search_options': None,
-                        'search_strategy': 'local',
-                        'searcher': 'random'}}
+        Get the preset using the AGArgs. Save as self.hyperparameters.
         Ref: https://auto.gluon.ai/0.3.1/tutorials/text_prediction/customization.html
-        Return:
-            hyperparameters: a Dict of the preset hyperparameter settings.
         """
         from autogluon.text.text_prediction.legacy_presets import ag_text_presets
 
         base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}'
-        hyperparameters = ag_text_presets.create(base_key)
-        # NOTE: set anything else that would like to be set via ag_args here
-        return hyperparameters
-
+        self.hyperparameters = ag_text_presets.create(base_key)
+        # NOTE: set batch & epoch
+        search_space = self.hyperparameters["models"]["MultimodalTextModel"]["search_space"]
+        search_space["optimization.per_device_batch_size"] = self.per_device_batch_size
+        search_space["optimization.batch_size"] = self.batch_size
+        search_space["optimization.num_train_epochs"] = self.num_train_epochs
 
     @staticmethod
     def load_args():
@@ -698,7 +676,6 @@ def load_args():
 
         arg_parser = argparse.ArgumentParser()
         for each_field in fields(AGArgs):
-            print(each_field)
             arg_parser.add_argument(
                 "--" + each_field.name,
                 type=each_field.type,
diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 0ba8e5ec10..004cd6b9bc 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -1,11 +1,11 @@
 from flaml import AutoML
 import pandas as pd
-import requests
 import gc
 import numpy as np
 import os
 import sys
 import platform
+import pickle
 from sklearn.model_selection import train_test_split
 os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
 
@@ -70,25 +70,6 @@ def test_multimodalestimator():
     }
     train_dataset = pd.DataFrame(train_data)
 
-    test_data = {
-            "sentence1": [
-                "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .",
-                "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .",
-                "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .",
-                "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .",
-            ],
-            "sentence2": [
-                "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .",
-                "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .",
-                "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .",
-                "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .",
-            ],
-            "numerical1": [3, 4, 5, 6],
-            "categorical1": ["b", "a", "a", "b"],
-            "label": [0, 1, 1, 2],
-    }
-    test_dataset = pd.DataFrame(test_data)
-
     # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR
     holdout_frac = default_holdout_frac(len(train_dataset), False)
 
@@ -102,15 +83,15 @@ def test_multimodalestimator():
     automl_settings = {
         "gpu_per_trial": 0,
         "max_iter": 2,
-        "time_budget": 50,
+        "time_budget": 10,
         "task": "classification",
         "metric": "accuracy",
     }
 
     automl_settings["ag_args"] = {
-        "output_dir": "test/ag/output/",
+        "output_dir": "test/ag_output/",
         "backend": "mxnet",
-        "text_backbone": "electra_base",
+        "text_backbone": "electra_small",
         "multimodal_fusion_strategy": "fuse_late",
     }
 
@@ -124,9 +105,14 @@ def test_multimodalestimator():
         estimator_list=["multimodal"],
         **automl_settings
     )
-
-    print("Try to run inference on test set")
-    score = automl.model.estimator.evaluate(test_dataset)
-    print(f"Inference on test set complete, {metric}: {score}")
+    automl.pickle("automl.pkl")
+    with open("automl.pkl", "rb") as f:
+        automl = pickle.load(f)
+    print("Try to run inference on validation set")
+    score = automl.score(valid_dataset[feature_columns], valid_dataset["label"])
+    print(f"Inference on validation set complete, {metric}: {score}")
     del automl
     gc.collect()
+
+if __name__ == "__main__":
+    test_multimodalestimator()
\ No newline at end of file

From 4cc2b4e783a2b5af3c24c67cf712df76c7ba60d5 Mon Sep 17 00:00:00 2001
From: Varia <Varia@variadembp.mynetworksettings.com>
Date: Thu, 14 Apr 2022 11:48:55 -0400
Subject: [PATCH 29/50] add MM_TASKS; no preprocess on X; pass val_data for
 early stopping

---
 flaml/automl.py                      | 16 +++++-----
 flaml/data.py                        | 48 +++++++++++++++-------------
 flaml/ml.py                          |  2 ++
 flaml/model.py                       |  7 ++--
 test/nlp/test_multimodalestimator.py | 26 ++-------------
 5 files changed, 44 insertions(+), 55 deletions(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index 53d52541d1..d8b4f35a21 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -46,6 +46,7 @@
     REGRESSION,
     _is_nlp_task,
     NLG_TASKS,
+    MM_TASKS,
 )
 from . import tune
 from .training_log import training_log_reader, training_log_writer
@@ -1480,6 +1481,10 @@ def _decide_split_type(self, split_type):
             self._state.task = get_classification_objective(
                 len(np.unique(self._y_train_all))
             )
+        elif self._state.task == "mm-classification":
+             self._state.task = "mm-" + get_classification_objective(
+                len(np.unique(self._y_train_all))
+            )
         if not isinstance(split_type, str):
             assert hasattr(split_type, "split") and hasattr(
                 split_type, "get_n_splits"
@@ -2192,6 +2197,9 @@ def is_to_reverse_metric(metric, task):
                 estimator_list = ["lgbm", "xgboost", "xgb_limitdepth"]
             elif _is_nlp_task(self._state.task):
                 estimator_list = ["transformer"]
+            # NOTE: if multimodal task, use multimodal estimator
+            elif self._state.task in MM_TASKS:
+                estimator_list=["multimodal"]
             else:
                 try:
                     import catboost
@@ -2280,14 +2288,6 @@ def is_to_reverse_metric(metric, task):
             )
         logger.info("List of ML learners in AutoML Run: {}".format(estimator_list))
         self.estimator_list = estimator_list
-        if self._transformer.text_columns:
-            if len(self._transformer.text_columns) == len(X_train.columns):
-                assert _is_nlp_task(self._state.task) == True
-            else:
-                self.estimator_list = ["multimodal"]
-                logger.warning("columns type of {} are set to text".format(self._transformer.text_columns))
-                logger.info("numerical columns {}".format(self._transformer._num_columns))
-                logger.info("categorical columns {}".format(self._transformer._cat_columns))
         self._state.time_budget = time_budget if time_budget > 0 else 1e10
         self._active_estimators = estimator_list.copy()
         self._ensemble = ensemble
diff --git a/flaml/data.py b/flaml/data.py
index 4ab80fe600..e036975460 100644
--- a/flaml/data.py
+++ b/flaml/data.py
@@ -23,9 +23,12 @@
     SEQCLASSIFICATION,
     MULTICHOICECLASSIFICATION,
     TOKENCLASSIFICATION,
+    "mm-binary",
+    "mm-multiclass",
+    "mm-classification",
 )
 SEQREGRESSION = "seq-regression"
-REGRESSION = ("regression", SEQREGRESSION)
+REGRESSION = ("regression", SEQREGRESSION, "mm-regression")
 TS_FORECASTREGRESSION = (
     "forecast",
     "ts_forecast",
@@ -46,6 +49,11 @@
     MULTICHOICECLASSIFICATION,
     TOKENCLASSIFICATION,
 )
+MM_TASKS = (
+    "mm-classification", 
+    "mm-regression", 
+    "mm-binary", 
+    "mm-multiclass",)
 
 
 def _is_nlp_task(task):
@@ -245,10 +253,6 @@ def concat(X1, X2):
 
 class DataTransformer:
     """Transform input training data."""
-    @property
-    def text_columns(self):
-        return self._str_columns
-
     def fit_transform(self, X: Union[DataFrame, np.array], y, task):
         """Fit transformer and process the input training data according to the task type.
 
@@ -272,11 +276,14 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task):
             if len(str_columns) > 0:
                 X[str_columns] = X[str_columns].astype("string")
             self._str_columns = str_columns
+        # NOTE: if multimodal task, no preprocessing on X
+        elif task in MM_TASKS:
+            for column in X.columns:
+                X[column].astype("object")
         elif isinstance(X, DataFrame):
             X = X.copy()
             n = X.shape[0]
-            # NOTE: add str_columns here
-            str_columns, cat_columns, num_columns, datetime_columns = [], [], [], []
+            cat_columns, num_columns, datetime_columns = [], [], []
             drop = False
             if task in TS_FORECAST:
                 X = X.rename(columns={X.columns[0]: TS_TIMESTAMP_COL})
@@ -286,17 +293,13 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task):
             for column in X.columns:
                 # sklearn\utils\validation.py needs int/float values
                 if X[column].dtype.name in ("object", "category"):
-                    if X[column].nunique() == 1:
+                    if (
+                        X[column].nunique() == 1
+                        or X[column].nunique(dropna=True)
+                        == n - X[column].isnull().sum()
+                    ):
                         X.drop(columns=column, inplace=True)
                         drop = True
-                    elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.5):
-                    # NOTE: here a threshold is applied for distinguishing str vs. cat 
-                    # if no threshold wanted => requires every non-nan str entry to be different
-                    # delete the line above and uncomment below
-                    # elif X[column].nunique(dropna=True) == n - X[column].isnull().sum():
-                        # NOTE: here detects str fields and do fillna with ""
-                        X[column] = X[column].fillna("")
-                        str_columns.append(column)
                     elif X[column].dtype.name == "category":
                         current_categories = X[column].cat.categories
                         if "__NAN__" not in current_categories:
@@ -338,7 +341,7 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task):
                         del tmp_dt
                     X[column] = X[column].fillna(np.nan)
                     num_columns.append(column)
-            X = X[str_columns + cat_columns + num_columns]
+            X = X[cat_columns + num_columns]
             if task in TS_FORECAST:
                 X.insert(0, TS_TIMESTAMP_COL, ds_col)
             if cat_columns:
@@ -367,8 +370,7 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task):
                     ]
                 )
                 X[num_columns] = self.transformer.fit_transform(X_num)
-            self._str_columns, self._cat_columns, self._num_columns, self._datetime_columns = (
-                str_columns,
+            self._cat_columns, self._num_columns, self._datetime_columns = (
                 cat_columns,
                 num_columns,
                 datetime_columns,
@@ -408,9 +410,11 @@ def transform(self, X: Union[DataFrame, np.array]):
             # ids (input ids, token type id, attention mask, etc.)
             if len(self._str_columns) > 0:
                 X[self._str_columns] = X[self._str_columns].astype("string")
+        elif self._task in MM_TASKS:
+            for column in X.columns:
+                X[column].astype("category")
         elif isinstance(X, DataFrame):
-            str_columns, cat_columns, num_columns, datetime_columns = (
-                self._str_columns,
+            cat_columns, num_columns, datetime_columns = (
                 self._cat_columns,
                 self._num_columns,
                 self._datetime_columns,
@@ -436,7 +440,7 @@ def transform(self, X: Union[DataFrame, np.array]):
                         X[new_col_name] = new_col_value
                 X[column] = X[column].map(datetime.toordinal)
                 del tmp_dt
-            X = X[str_columns + cat_columns + num_columns].copy()
+            X = X[cat_columns + num_columns].copy()
             if self._task in TS_FORECAST:
                 X.insert(0, TS_TIMESTAMP_COL, ds_col)
             for column in cat_columns:
diff --git a/flaml/ml.py b/flaml/ml.py
index 586f06f2d0..436b41caf2 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -580,6 +580,8 @@ def compute_estimator(
 
     elif isinstance(estimator, MultiModalEstimator):
         fit_kwargs["metric"] = eval_metric
+        fit_kwargs["X_val"] = X_val
+        fit_kwargs["y_val"] = y_val
 
     if "holdout" == eval_method:
         val_loss, metric_for_logging, train_time, pred_time = get_val_loss(
diff --git a/flaml/model.py b/flaml/model.py
index e4c01892f9..a0a834092c 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -31,6 +31,7 @@
     SUMMARIZATION,
     NLG_TASKS,
     MULTICHOICECLASSIFICATION,
+    MM_TASKS
 )
 
 try:
@@ -2116,7 +2117,6 @@ def search_space(cls, **params):
         reference:
         https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values
         """
-        # TODO: expand the search space
         search_space_dict = {
             "model.network.agg_net.mid_units": {
                 "domain": tune.choice(list(range(32, 129))),
@@ -2169,13 +2169,16 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
                 search_space[key] = value.item() if isinstance(value, np.float64) else value
         start_time = time.time()
         self.model_path = os.path.join(self.ag_args.output_dir, self.trial_id)
+        assert self._task in MM_TASKS, f"The task is not multimodal, but {self._task}. "
         model = TextPredictor(path=self.model_path,
                               label="label",
-                              problem_type=self._task,
+                              problem_type=self._task[3:],
                               eval_metric=kwargs["metric"],
                               backend=self.ag_args.backend)
         train_data = BaseEstimator._join(X_train, y_train)
+        tuning_data = BaseEstimator._join(kwargs.get("X_val"), kwargs.get("y_val"))
         model.fit(train_data=train_data,
+                  tuning_data=tuning_data,
                   hyperparameters=hyperparameters,
                   num_gpus=kwargs.get("gpu_per_trial", None),
                   time_limit=budget,
diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 004cd6b9bc..dfc748d992 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -9,21 +9,6 @@
 from sklearn.model_selection import train_test_split
 os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1"
 
-def default_holdout_frac(num_train_rows, hyperparameter_tune=False):
-    """
-    Returns default holdout_frac used in fit().
-    Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples.
-    Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243
-    """
-    if num_train_rows < 5000:
-        holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows))
-    else:
-        holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows))
-
-    if hyperparameter_tune:
-        holdout_frac = min(0.2, holdout_frac * 2)  # to allocate more validation data for HPO to avoid overfitting
-
-    return holdout_frac
 
 def test_multimodalestimator():
     if sys.version < "3.7":
@@ -69,12 +54,8 @@ def test_multimodalestimator():
         "label": [1, 0, 2, 0, 1, 2, 0, 1, 1, 2, 0, 1],
     }
     train_dataset = pd.DataFrame(train_data)
-
-    # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR
-    holdout_frac = default_holdout_frac(len(train_dataset), False)
-
-    _, valid_dataset = train_test_split(train_dataset,
-                                    test_size=holdout_frac,
+    train_dataset, valid_dataset = train_test_split(train_dataset,
+                                    test_size=0.2,
                                     random_state=np.random.RandomState(seed))
     
     feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
@@ -84,7 +65,7 @@ def test_multimodalestimator():
         "gpu_per_trial": 0,
         "max_iter": 2,
         "time_budget": 10,
-        "task": "classification",
+        "task": "mm-classification",
         "metric": "accuracy",
     }
 
@@ -102,7 +83,6 @@ def test_multimodalestimator():
         y_val=valid_dataset["label"],
         eval_method="holdout",
         auto_augment=False,
-        estimator_list=["multimodal"],
         **automl_settings
     )
     automl.pickle("automl.pkl")

From 4fa136d8ab300142caa73093399b4e37722bce5c Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Thu, 14 Apr 2022 14:14:44 -0400
Subject: [PATCH 30/50] adjust testing data and raise budget

---
 test/nlp/test_multimodalestimator.py | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index dfc748d992..9175669e0e 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -32,8 +32,6 @@ def test_multimodalestimator():
             "The DVD-CCA then appealed to the state Supreme Court .",
             "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
             "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
-            "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
-            "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
         ],
         "sentence2": [
             'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
@@ -46,12 +44,10 @@ def test_multimodalestimator():
             "The DVD CCA appealed that decision to the U.S. Supreme Court .",
             "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
             "The DVD-CCA then appealed to the state Supreme Court .",  
-            "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
-            "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
         ],
-        "numerical1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
-        "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a", "a", "b", "a", "a"],
-        "label": [1, 0, 2, 0, 1, 2, 0, 1, 1, 2, 0, 1],
+        "numerical1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a", "a", "b"],
+        "label": [1, 0, 2, 0, 1, 2, 0, 1, 1, 2],
     }
     train_dataset = pd.DataFrame(train_data)
     train_dataset, valid_dataset = train_test_split(train_dataset,
@@ -64,7 +60,7 @@ def test_multimodalestimator():
     automl_settings = {
         "gpu_per_trial": 0,
         "max_iter": 2,
-        "time_budget": 10,
+        "time_budget": 15,
         "task": "mm-classification",
         "metric": "accuracy",
     }
@@ -93,6 +89,3 @@ def test_multimodalestimator():
     print(f"Inference on validation set complete, {metric}: {score}")
     del automl
     gc.collect()
-
-if __name__ == "__main__":
-    test_multimodalestimator()
\ No newline at end of file

From 25c1baf285fec3f988a7b54491c7e1806f26fb8f Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Thu, 14 Apr 2022 14:59:37 -0400
Subject: [PATCH 31/50] shrink test toy data and budget

---
 test/nlp/test_multimodalestimator.py | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 9175669e0e..4b70677d07 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -1,6 +1,5 @@
 from flaml import AutoML
 import pandas as pd
-import gc
 import numpy as np
 import os
 import sys
@@ -27,11 +26,6 @@ def test_multimodalestimator():
             "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
             "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
             "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
-            "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .",
-            "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
-            "The DVD-CCA then appealed to the state Supreme Court .",
-            "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
-            "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
         ],
         "sentence2": [
             'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
@@ -39,15 +33,10 @@ def test_multimodalestimator():
             "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
             "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
             "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
-            "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .",
-            "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .",
-            "The DVD CCA appealed that decision to the U.S. Supreme Court .",
-            "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .",
-            "The DVD-CCA then appealed to the state Supreme Court .",  
         ],
-        "numerical1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
-        "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a", "a", "b"],
-        "label": [1, 0, 2, 0, 1, 2, 0, 1, 1, 2],
+        "numerical1": [1, 2, 3, 4, 5],
+        "categorical1": ["a", "b", "a", "b", "a", ],
+        "label": [1, 0, 1, 0, 1,],
     }
     train_dataset = pd.DataFrame(train_data)
     train_dataset, valid_dataset = train_test_split(train_dataset,
@@ -63,6 +52,7 @@ def test_multimodalestimator():
         "time_budget": 15,
         "task": "mm-classification",
         "metric": "accuracy",
+        "seed": seed,
     }
 
     automl_settings["ag_args"] = {
@@ -87,5 +77,3 @@ def test_multimodalestimator():
     print("Try to run inference on validation set")
     score = automl.score(valid_dataset[feature_columns], valid_dataset["label"])
     print(f"Inference on validation set complete, {metric}: {score}")
-    del automl
-    gc.collect()

From f9d3b22491c0f90dc7d26ba274d7d0911c76bd4c Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Thu, 14 Apr 2022 15:36:35 -0400
Subject: [PATCH 32/50] change to regression test

---
 test/nlp/test_multimodalestimator.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 4b70677d07..deae2e2c91 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -35,8 +35,8 @@ def test_multimodalestimator():
             "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
         ],
         "numerical1": [1, 2, 3, 4, 5],
-        "categorical1": ["a", "b", "a", "b", "a", ],
-        "label": [1, 0, 1, 0, 1,],
+        "categorical1": ["a", "b", "a", "b", "a"],
+        "label": [5, 4, 3, 2, 1],
     }
     train_dataset = pd.DataFrame(train_data)
     train_dataset, valid_dataset = train_test_split(train_dataset,
@@ -50,8 +50,8 @@ def test_multimodalestimator():
         "gpu_per_trial": 0,
         "max_iter": 2,
         "time_budget": 15,
-        "task": "mm-classification",
-        "metric": "accuracy",
+        "task": "mm-regression",
+        "metric": "r2",
         "seed": seed,
     }
 

From c1568b447ef227e2d0a9866ca77e208db536f95f Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Thu, 14 Apr 2022 16:12:15 -0400
Subject: [PATCH 33/50] add metric to kwargs for mm in train_estimator, raise
 test budget

---
 flaml/ml.py                          | 2 ++
 test/nlp/test_multimodalestimator.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/flaml/ml.py b/flaml/ml.py
index 436b41caf2..cbe7f5e0d8 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -637,6 +637,8 @@ def train_estimator(
     )
     if isinstance(estimator, TransformersEstimator):
         fit_kwargs["metric"] = eval_metric
+    elif isinstance(estimator, MultiModalEstimator):
+        fit_kwargs["metric"] = eval_metric
 
     if X_train is not None:
         train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index deae2e2c91..dc82e696cd 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -49,7 +49,7 @@ def test_multimodalestimator():
     automl_settings = {
         "gpu_per_trial": 0,
         "max_iter": 2,
-        "time_budget": 15,
+        "time_budget": 20,
         "task": "mm-regression",
         "metric": "r2",
         "seed": seed,

From 1e4201d49568400865e00053b3ff15056f9e916b Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Fri, 15 Apr 2022 17:38:39 -0400
Subject: [PATCH 34/50] use valid data if any for early stopping, raise test
 budget

---
 flaml/model.py                       | 9 ++++++++-
 test/nlp/test_multimodalestimator.py | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/flaml/model.py b/flaml/model.py
index 1f74762aa9..ba5aaa1609 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -2181,7 +2181,14 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
                               eval_metric=kwargs["metric"],
                               backend=self.ag_args.backend)
         train_data = BaseEstimator._join(X_train, y_train)
-        tuning_data = BaseEstimator._join(kwargs.get("X_val"), kwargs.get("y_val"))
+        # use valid data for early stopping
+        X_val = kwargs.get("X_val")
+        y_val = kwargs.get("y_val")
+        if X_val and y_val:
+            tuning_data = BaseEstimator._join(X_val, y_val)
+        else:
+            tuning_data = None
+        # NOTE: if no tuning_data, model.fit() will holdout a fraction from train_data for early stopping
         model.fit(train_data=train_data,
                   tuning_data=tuning_data,
                   hyperparameters=hyperparameters,
diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index dc82e696cd..7f7dab6f52 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -49,7 +49,7 @@ def test_multimodalestimator():
     automl_settings = {
         "gpu_per_trial": 0,
         "max_iter": 2,
-        "time_budget": 20,
+        "time_budget": 30,
         "task": "mm-regression",
         "metric": "r2",
         "seed": seed,

From 9692d4eded7bc16a3b5fecada2a056a58606aacb Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Fri, 15 Apr 2022 19:26:41 -0400
Subject: [PATCH 35/50] return to the original budget

---
 test/nlp/test_multimodalestimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 7f7dab6f52..bb6518f19d 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -49,7 +49,7 @@ def test_multimodalestimator():
     automl_settings = {
         "gpu_per_trial": 0,
         "max_iter": 2,
-        "time_budget": 30,
+        "time_budget": 50,
         "task": "mm-regression",
         "metric": "r2",
         "seed": seed,

From 1b2cb28e6572525036e44741da8ceb430e3b520a Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Sat, 16 Apr 2022 01:23:02 -0400
Subject: [PATCH 36/50] fix valid DF checking

---
 flaml/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flaml/model.py b/flaml/model.py
index ba5aaa1609..196f8228c2 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -2184,7 +2184,7 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
         # use valid data for early stopping
         X_val = kwargs.get("X_val")
         y_val = kwargs.get("y_val")
-        if X_val and y_val:
+        if X_val is not None and y_val is not None:
             tuning_data = BaseEstimator._join(X_val, y_val)
         else:
             tuning_data = None

From 05941bc059fc999012d0716f702422c4a7eb0b2b Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Mon, 18 Apr 2022 16:10:15 -0400
Subject: [PATCH 37/50] simplify isinstance in ml.py

---
 flaml/ml.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/flaml/ml.py b/flaml/ml.py
index cbe7f5e0d8..8cd2e7e974 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -573,12 +573,7 @@ def compute_estimator(
         n_jobs=n_jobs,
     )
 
-    if isinstance(estimator, TransformersEstimator):
-        fit_kwargs["metric"] = eval_metric
-        fit_kwargs["X_val"] = X_val
-        fit_kwargs["y_val"] = y_val
-
-    elif isinstance(estimator, MultiModalEstimator):
+    if isinstance(estimator, (TransformersEstimator, MultiModalEstimator)):
         fit_kwargs["metric"] = eval_metric
         fit_kwargs["X_val"] = X_val
         fit_kwargs["y_val"] = y_val

From 74f27b589adc4dd4f0f9a82e28810987b732791a Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Tue, 19 Apr 2022 12:31:55 -0400
Subject: [PATCH 38/50] reduce text column and budget

---
 test/nlp/test_multimodalestimator.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index bb6518f19d..7b81fe49e0 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -27,13 +27,13 @@ def test_multimodalestimator():
             "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
             "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
         ],
-        "sentence2": [
-            'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
-            "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
-            "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
-            "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
-            "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
-        ],
+        # "sentence2": [
+        #     'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
+        #     "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
+        #     "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
+        #     "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
+        #     "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
+        # ],
         "numerical1": [1, 2, 3, 4, 5],
         "categorical1": ["a", "b", "a", "b", "a"],
         "label": [5, 4, 3, 2, 1],
@@ -43,13 +43,13 @@ def test_multimodalestimator():
                                     test_size=0.2,
                                     random_state=np.random.RandomState(seed))
     
-    feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
-
+    # feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
+    feature_columns = ["sentence1", "numerical1", "categorical1"]
     automl = AutoML()
     automl_settings = {
         "gpu_per_trial": 0,
         "max_iter": 2,
-        "time_budget": 50,
+        "time_budget": 20,
         "task": "mm-regression",
         "metric": "r2",
         "seed": seed,

From c8848c76194b5b10616aaab6d5f019c7730981c5 Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Tue, 19 Apr 2022 13:21:32 -0400
Subject: [PATCH 39/50] use only 4-row toy test data

---
 test/nlp/test_multimodalestimator.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 7b81fe49e0..16092256c2 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -25,25 +25,16 @@ def test_multimodalestimator():
             "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
             "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
             "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
-            "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .",
         ],
-        # "sentence2": [
-        #     'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
-        #     "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .",
-        #     "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .",
-        #     "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .",
-        #     "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .",
-        # ],
-        "numerical1": [1, 2, 3, 4, 5],
-        "categorical1": ["a", "b", "a", "b", "a"],
-        "label": [5, 4, 3, 2, 1],
+        "numerical1": [1, 2, 3, 4],
+        "categorical1": ["a", "b", "a", "b"],
+        "label": [5, 4, 3, 2],
     }
     train_dataset = pd.DataFrame(train_data)
     train_dataset, valid_dataset = train_test_split(train_dataset,
                                     test_size=0.2,
                                     random_state=np.random.RandomState(seed))
     
-    # feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"]
     feature_columns = ["sentence1", "numerical1", "categorical1"]
     automl = AutoML()
     automl_settings = {

From 7be2c5c6848e4f9c129c1c166d8144c22d0abcb7 Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Tue, 19 Apr 2022 14:06:40 -0400
Subject: [PATCH 40/50] test 10s budget

---
 test/nlp/test_multimodalestimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 16092256c2..6c08ad58b1 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -40,7 +40,7 @@ def test_multimodalestimator():
     automl_settings = {
         "gpu_per_trial": 0,
         "max_iter": 2,
-        "time_budget": 20,
+        "time_budget": 10,
         "task": "mm-regression",
         "metric": "r2",
         "seed": seed,

From 1c7f7ad431750816cea506b6454d39bd95e5de19 Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Tue, 19 Apr 2022 15:59:06 -0400
Subject: [PATCH 41/50] minimize test toy dataset

---
 test/nlp/test_multimodalestimator.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 6c08ad58b1..d6bb197891 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -18,24 +18,26 @@ def test_multimodalestimator():
         return
 
     seed = 123
-    metric = "accuracy"
+    metric = "r2"
     train_data = {
         "sentence1": [
             'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
-            "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .",
+        ],
+        "numerical1": [1],
+        "label": [1],
+    }
+
+    valid_data = {
+        "sentence1": [
             "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
-            "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .",
         ],
-        "numerical1": [1, 2, 3, 4],
-        "categorical1": ["a", "b", "a", "b"],
-        "label": [5, 4, 3, 2],
+        "numerical1": [1],
+        "label": [1],
     }
     train_dataset = pd.DataFrame(train_data)
-    train_dataset, valid_dataset = train_test_split(train_dataset,
-                                    test_size=0.2,
-                                    random_state=np.random.RandomState(seed))
+    valid_dataset = pd.DataFrame(valid_data)
     
-    feature_columns = ["sentence1", "numerical1", "categorical1"]
+    feature_columns = ["sentence1", "numerical1"]
     automl = AutoML()
     automl_settings = {
         "gpu_per_trial": 0,

From be60fa6c22bb0f68e8a6b720d1a15401ceb500ed Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Tue, 19 Apr 2022 16:38:01 -0400
Subject: [PATCH 42/50] shorter test sentence

---
 test/nlp/test_multimodalestimator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index d6bb197891..4d0a864677 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -21,7 +21,7 @@ def test_multimodalestimator():
     metric = "r2"
     train_data = {
         "sentence1": [
-            'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
+            "Amrozi accused his brother of deliberately distorting his evidence.",
         ],
         "numerical1": [1],
         "label": [1],
@@ -29,7 +29,7 @@ def test_multimodalestimator():
 
     valid_data = {
         "sentence1": [
-            "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .",
+            "They had published an advertisement on the Internet on June 10.",
         ],
         "numerical1": [1],
         "label": [1],

From 3a29c5b276ddef0366f79dce923a2892372f6496 Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Wed, 20 Apr 2022 14:23:23 -0400
Subject: [PATCH 43/50] give enough test budget

---
 test/nlp/test_multimodalestimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 4d0a864677..63f7f7bc50 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -42,7 +42,7 @@ def test_multimodalestimator():
     automl_settings = {
         "gpu_per_trial": 0,
         "max_iter": 2,
-        "time_budget": 10,
+        "time_budget": 20,
         "task": "mm-regression",
         "metric": "r2",
         "seed": seed,

From 543b660e1afcd42cf69cf83e348951c546934ed0 Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Wed, 20 Apr 2022 14:23:23 -0400
Subject: [PATCH 44/50] give enough test budget

---
 flaml/automl.py                      | 2 ++
 flaml/data.py                        | 4 ----
 test/nlp/test_multimodalestimator.py | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index d8b4f35a21..c5818c72be 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -721,6 +721,8 @@ def score(self, X: pd.DataFrame, y: pd.Series, **kwargs):
             )
             return None
         X = self._preprocess(X)
+        if self._label_transformer:
+            y = self._label_transformer.transform(y)
         return estimator.score(X, y, **kwargs)
 
     def predict(
diff --git a/flaml/data.py b/flaml/data.py
index e036975460..0cadead32a 100644
--- a/flaml/data.py
+++ b/flaml/data.py
@@ -395,13 +395,9 @@ def transform(self, X: Union[DataFrame, np.array]):
 
         Args:
             X: A numpy array or a pandas dataframe of training data.
-            y: A numpy array or a pandas series of labels.
-            task: A string of the task type, e.g.,
-                'classification', 'regression', 'ts_forecast', 'rank'.
 
         Returns:
             X: Processed numpy array or pandas dataframe of training data.
-            y: Processed numpy array or pandas series of labels.
         """
         X = X.copy()
 
diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 4d0a864677..63f7f7bc50 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -42,7 +42,7 @@ def test_multimodalestimator():
     automl_settings = {
         "gpu_per_trial": 0,
         "max_iter": 2,
-        "time_budget": 10,
+        "time_budget": 20,
         "task": "mm-regression",
         "metric": "r2",
         "seed": seed,

From 5bd061f354418064a8e4df746d682dafc0bc0420 Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Thu, 12 May 2022 15:02:09 -0400
Subject: [PATCH 45/50] add pytorch backend support

---
 flaml/model.py                       | 50 +++++++++++++++-------
 flaml/nlp/utils.py                   | 30 +++++++++----
 test/nlp/test_multimodalestimator.py | 63 ++++++++++++++++++++++++++++
 3 files changed, 119 insertions(+), 24 deletions(-)

diff --git a/flaml/model.py b/flaml/model.py
index 3707db4804..c7083ce84a 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -2159,14 +2159,14 @@ def search_space(cls, **params):
     def _init_ag_args(self, automl_fit_kwargs: dict = None):
         from .nlp.utils import AGArgs
 
-        ag_args = AGArgs()
-        for key, val in automl_fit_kwargs["ag_args"].items():
-            assert (
-                key in ag_args.__dict__
-            ), "The specified key {} is not in the argument list of flaml.nlp.utils::AGArgs".format(
-                key
-            )
-            setattr(ag_args, key, val)
+        ag_args = AGArgs(**automl_fit_kwargs["ag_args"])
+        # for key, val in automl_fit_kwargs["ag_args"].items():
+        #     assert (
+        #         key in ag_args.__dict__
+        #     ), "The specified key {} is not in the argument list of flaml.nlp.utils::AGArgs".format(
+        #         key
+        #     )
+        #     setattr(ag_args, key, val)
         self.ag_args = ag_args
 
     def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
@@ -2176,16 +2176,34 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
         self._init_ag_args(kwargs)
         seed = self._kwargs.get("seed", 123)
 
-        assert (self.ag_args.backend == "mxnet"), "the pytorch automm model is not supported. "
         # get & set the hyperparameters, update with self.params
         hyperparameters = self.ag_args.hyperparameters
-        search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"]
-        for key, value in self.params.items():
-            # NOTE: FLAML uses np.float64 but AG uses float, need to transform
-            if key == "n_jobs": 
-                continue
-            else:
-                search_space[key] = value.item() if isinstance(value, np.float64) else value
+        if self.ag_args.backend == "mxnet":
+            search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"]
+            for key, value in self.params.items():
+                # NOTE: FLAML uses np.float64 but AG uses float, need to transform
+                if key == "n_jobs": 
+                    continue
+                else:
+                    search_space[key] = value.item() if isinstance(value, np.float64) else value
+        # elif using pytorch backend
+        else:
+            # TODO: if pytorch only, remove this mapper and modify the search space keys directly
+            # then AGargs in utils.py should be modify accordingly
+            KEY_MAPPER = {
+                "model.network.agg_net.mid_units": "model.fusion_mlp.hidden_sizes",
+                "optimization.lr": "optimization.learning_rate",
+                "optimization.wd": "optimization.weight_decay",
+                "optimization.warmup_portion": "warmup_steps",
+            }
+            for key, value in self.params.items():
+                if key == "n_jobs":
+                    continue
+                elif key == "model.network.agg_net.mid_units":
+                    hyperparameters[KEY_MAPPER[key]] = [value]
+                else:
+                    hyperparameters[key] = value.item() if isinstance(value, np.float64) else value
+
         start_time = time.time()
         self.model_path = os.path.join(self.ag_args.output_dir, self.trial_id)
         assert self._task in MM_TASKS, f"The task is not multimodal, but {self._task}. "
diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py
index 9c7b68d136..6ed52d11bc 100644
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@@ -491,7 +491,8 @@ class AGArgs:
         default="data/mm/output/", metadata={"help": "data dir", "required": True}
     )
     backend: str = field(default="mxnet", metadata={"help": "the backend of the multimodal model"})
-    text_backbone: str = field(default="electra_base", metadata={"help": "text backbone model"})
+    text_backbone: str = field(default="electra_base", metadata={"help": "mxnet text backbone model"})
+    hf_model_checkpoint_name: str = field(default="google/electra-base-discriminator", metadata={"help": "HF model"})
     multimodal_fusion_strategy: str = field(default="fuse_late", metadata={"help": "fusion strategy"})
     per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"})
     num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"})
@@ -504,14 +505,27 @@ def __post_init__(self):
         Ref: https://auto.gluon.ai/0.3.1/tutorials/text_prediction/customization.html
         """
         from autogluon.text.text_prediction.legacy_presets import ag_text_presets
+        from autogluon.text.text_prediction.presets import get_text_preset
+
+        if self.backend == "mxnet":
+            base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}'
+            self.hyperparameters = ag_text_presets.create(base_key)
+            # NOTE: set batch & epoch
+            search_space = self.hyperparameters["models"]["MultimodalTextModel"]["search_space"]
+            search_space["optimization.per_device_batch_size"] = self.per_device_batch_size
+            search_space["optimization.batch_size"] = self.batch_size
+            search_space["optimization.num_train_epochs"] = self.num_train_epochs
+        elif self.backend == "pytorch":
+            # get the override from the text preset tuple
+            self.hyperparameters = get_text_preset("default")[1]
+
+            self.hyperparameters["model.hf_text.checkpoint_name"] = self.hf_model_checkpoint_name
+            self.hyperparameters["env.per_gpu_batch_size"] = self.per_device_batch_size
+            self.hyperparameters["env.batch_size"] = self.batch_size
+            self.hyperparameters["optimization.max_epochs"] = self.num_train_epochs
 
-        base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}'
-        self.hyperparameters = ag_text_presets.create(base_key)
-        # NOTE: set batch & epoch
-        search_space = self.hyperparameters["models"]["MultimodalTextModel"]["search_space"]
-        search_space["optimization.per_device_batch_size"] = self.per_device_batch_size
-        search_space["optimization.batch_size"] = self.batch_size
-        search_space["optimization.num_train_epochs"] = self.num_train_epochs
+        else:
+            raise ValueError(f"No {self.backend} backend, please choose mxnet or pytorch.")
 
     @staticmethod
     def load_args():
diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 63f7f7bc50..9f27f381e8 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -70,3 +70,66 @@ def test_multimodalestimator():
     print("Try to run inference on validation set")
     score = automl.score(valid_dataset[feature_columns], valid_dataset["label"])
     print(f"Inference on validation set complete, {metric}: {score}")
+
+
+def test_pytorch_backend():
+    if sys.version < "3.7":
+        # do not test on python3.6
+        return
+    elif platform.system() == "Windows":
+        # do not test on windows with py3.8
+        return
+    seed = 123
+    metric = "r2"
+    train_data = {
+        "sentence1": [
+            "Mary had a little lamb.",
+            "Its fleece was white as snow."
+        ],
+        "numerical1": [1, 2],
+        "label": [1, 2],
+    }
+
+    valid_data = {
+        "sentence1": [
+            "Mary had a little lamb.",
+            "Its fleece was white as snow."
+        ],
+        "numerical1": [1, 2],
+        "label": [1, 2],
+    }
+    train_dataset = pd.DataFrame(train_data)
+    valid_dataset = pd.DataFrame(valid_data)
+    
+    feature_columns = ["sentence1", "numerical1"]
+    automl = AutoML()
+    automl_settings = {
+        "gpu_per_trial": 0,
+        "max_iter": 2,
+        "time_budget": 30,
+        "task": "mm-regression",
+        "metric": "r2",
+        "seed": seed,
+    }
+
+    automl_settings["ag_args"] = {
+        "output_dir": "test/ag_output/",
+        "backend": "pytorch",
+        "text_backbone": "google/electra-small-discriminator"
+    }
+
+    automl.fit(
+        X_train=train_dataset[feature_columns],
+        y_train=train_dataset["label"],
+        X_val=valid_dataset[feature_columns],
+        y_val=valid_dataset["label"],
+        eval_method="holdout",
+        auto_augment=False,
+        **automl_settings
+    )
+    automl.pickle("automl.pkl")
+    with open("automl.pkl", "rb") as f:
+        automl = pickle.load(f)
+    print("Try to run inference on validation set")
+    score = automl.score(valid_dataset[feature_columns], valid_dataset["label"])
+    print(f"Inference on validation set complete, {metric}: {score}")

From 2b150e71e04d06758bfe877749bf195006a982aa Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Thu, 19 May 2022 12:07:55 -0400
Subject: [PATCH 46/50] set pytorch backend to default

---
 flaml/nlp/utils.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py
index 6ed52d11bc..c75051b146 100644
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@@ -490,7 +490,7 @@ class AGArgs:
     output_dir: str = field(
         default="data/mm/output/", metadata={"help": "data dir", "required": True}
     )
-    backend: str = field(default="mxnet", metadata={"help": "the backend of the multimodal model"})
+    backend: str = field(default="pytorch", metadata={"help": "the backend of the multimodal model"})
     text_backbone: str = field(default="electra_base", metadata={"help": "mxnet text backbone model"})
     hf_model_checkpoint_name: str = field(default="google/electra-base-discriminator", metadata={"help": "HF model"})
     multimodal_fusion_strategy: str = field(default="fuse_late", metadata={"help": "fusion strategy"})
@@ -504,10 +504,20 @@ def __post_init__(self):
         Get the preset using the AGArgs. Save as self.hyperparameters.
         Ref: https://auto.gluon.ai/0.3.1/tutorials/text_prediction/customization.html
         """
-        from autogluon.text.text_prediction.legacy_presets import ag_text_presets
-        from autogluon.text.text_prediction.presets import get_text_preset
+        if self.backend == "pytorch":
+            from autogluon.text.text_prediction.presets import get_text_preset
+            
+            # get the override from the text preset tuple
+            self.hyperparameters = get_text_preset("default")[1]
+
+            self.hyperparameters["model.hf_text.checkpoint_name"] = self.hf_model_checkpoint_name
+            self.hyperparameters["env.per_gpu_batch_size"] = self.per_device_batch_size
+            self.hyperparameters["env.batch_size"] = self.batch_size
+            self.hyperparameters["optimization.max_epochs"] = self.num_train_epochs
+        
+        elif self.backend == "mxnet":
+            from autogluon.text.text_prediction.legacy_presets import ag_text_presets
 
-        if self.backend == "mxnet":
             base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}'
             self.hyperparameters = ag_text_presets.create(base_key)
             # NOTE: set batch & epoch
@@ -515,14 +525,6 @@ def __post_init__(self):
             search_space["optimization.per_device_batch_size"] = self.per_device_batch_size
             search_space["optimization.batch_size"] = self.batch_size
             search_space["optimization.num_train_epochs"] = self.num_train_epochs
-        elif self.backend == "pytorch":
-            # get the override from the text preset tuple
-            self.hyperparameters = get_text_preset("default")[1]
-
-            self.hyperparameters["model.hf_text.checkpoint_name"] = self.hf_model_checkpoint_name
-            self.hyperparameters["env.per_gpu_batch_size"] = self.per_device_batch_size
-            self.hyperparameters["env.batch_size"] = self.batch_size
-            self.hyperparameters["optimization.max_epochs"] = self.num_train_epochs
 
         else:
             raise ValueError(f"No {self.backend} backend, please choose mxnet or pytorch.")

From 505c89412d186ecd89038152cad1fa014b31fddb Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Thu, 19 May 2022 15:15:24 -0400
Subject: [PATCH 47/50] pytorch backend support only

---
 flaml/model.py                       | 60 ++++++----------------
 flaml/nlp/utils.py                   | 46 +++++------------
 setup.py                             |  1 -
 test/nlp/test_multimodalestimator.py | 77 ++--------------------------
 4 files changed, 31 insertions(+), 153 deletions(-)

diff --git a/flaml/model.py b/flaml/model.py
index c7083ce84a..c1ce3e95c4 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -2137,72 +2137,42 @@ def search_space(cls, **params):
         https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values
         """
         search_space_dict = {
-            "model.network.agg_net.mid_units": {
+            "model.fusion_mlp.hidden_sizes": {
                 "domain": tune.choice(list(range(32, 129))),
                 "init_value": 128,
             },
-            "optimization.lr": {
+            "optimization.learning_rate": {
                 "domain": tune.loguniform(lower=1E-5, upper=1E-4),
                 "init_value": 1E-4,
             },
-            "optimization.wd": {
+            "optimization.weight_decay": {
                 "domain": tune.choice([1E-4, 1E-3, 1E-2]),
                 "init_value": 1E-4,
             },
-            "optimization.warmup_portion": {
+            "optimization.warmup_steps": {
                 "domain": tune.choice([0.1, 0.2]),
                 "init_value": 0.1, 
             },
         }
         return search_space_dict
 
-    def _init_ag_args(self, automl_fit_kwargs: dict = None):
-        from .nlp.utils import AGArgs
-
-        ag_args = AGArgs(**automl_fit_kwargs["ag_args"])
-        # for key, val in automl_fit_kwargs["ag_args"].items():
-        #     assert (
-        #         key in ag_args.__dict__
-        #     ), "The specified key {} is not in the argument list of flaml.nlp.utils::AGArgs".format(
-        #         key
-        #     )
-        #     setattr(ag_args, key, val)
-        self.ag_args = ag_args
-
     def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
         from autogluon.text import TextPredictor
+        from .nlp.utils import AGArgs
 
         self._kwargs = kwargs
-        self._init_ag_args(kwargs)
+        self.ag_args = AGArgs(**kwargs["ag_args"])
         seed = self._kwargs.get("seed", 123)
 
         # get & set the hyperparameters, update with self.params
         hyperparameters = self.ag_args.hyperparameters
-        if self.ag_args.backend == "mxnet":
-            search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"]
-            for key, value in self.params.items():
-                # NOTE: FLAML uses np.float64 but AG uses float, need to transform
-                if key == "n_jobs": 
-                    continue
-                else:
-                    search_space[key] = value.item() if isinstance(value, np.float64) else value
-        # elif using pytorch backend
-        else:
-            # TODO: if pytorch only, remove this mapper and modify the search space keys directly
-            # then AGargs in utils.py should be modify accordingly
-            KEY_MAPPER = {
-                "model.network.agg_net.mid_units": "model.fusion_mlp.hidden_sizes",
-                "optimization.lr": "optimization.learning_rate",
-                "optimization.wd": "optimization.weight_decay",
-                "optimization.warmup_portion": "warmup_steps",
-            }
-            for key, value in self.params.items():
-                if key == "n_jobs":
-                    continue
-                elif key == "model.network.agg_net.mid_units":
-                    hyperparameters[KEY_MAPPER[key]] = [value]
-                else:
-                    hyperparameters[key] = value.item() if isinstance(value, np.float64) else value
+        for key, value in self.params.items():
+            if key == "n_jobs":
+                continue
+            elif key == "model.fusion_mlp.hidden_sizes":
+                hyperparameters[key] = [value]
+            else:
+                hyperparameters[key] = value.item() if isinstance(value, np.float64) else value
 
         start_time = time.time()
         self.model_path = os.path.join(self.ag_args.output_dir, self.trial_id)
@@ -2211,7 +2181,7 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs):
                               label="label",
                               problem_type=self._task[3:],
                               eval_metric=kwargs["metric"],
-                              backend=self.ag_args.backend,
+                              backend="pytorch",
                               verbosity=0)
         train_data = BaseEstimator._join(X_train, y_train)
         # use valid data for early stopping
@@ -2253,7 +2223,7 @@ def predict_proba(self, X):
     def score(self, X_val: DataFrame, y_val: Series, **kwargs):
         from autogluon.text import TextPredictor
 
-        model = TextPredictor.load(path=self.model_path, backend=self.ag_args.backend)
+        model = TextPredictor.load(path=self.model_path, backend="pytorch")
         val_data = BaseEstimator._join(X_val, y_val)
         return model.evaluate(val_data)
         
diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py
index c75051b146..ad77f71034 100644
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@@ -483,17 +483,13 @@ class AGArgs:
     The Autogluon configurations
     Args:
         output_dir (str): data root directory for outputing the log and intermediate data, model.
-        backend (str, optional, defaults to "mxnet"): currently only support to mxnet.
-        text_backbone (str, optional, defaults to "electra_base"): the text backbone model.
-        multimodal_fusion_strategy (str, optional, defaults to "fuse_late"): the fuse strategy.
+        hf_model_checkpoint_name (str, optional, defaults to "google/electra-base-discriminator"): the HF model checkpoint.
+        per_device_batch_size (int, optional, defaults to 8)
+        num_train_epochs (int, optional, defaults to 10)
+        batch_size (int, optional, defaults to 128)
     """
-    output_dir: str = field(
-        default="data/mm/output/", metadata={"help": "data dir", "required": True}
-    )
-    backend: str = field(default="pytorch", metadata={"help": "the backend of the multimodal model"})
-    text_backbone: str = field(default="electra_base", metadata={"help": "mxnet text backbone model"})
-    hf_model_checkpoint_name: str = field(default="google/electra-base-discriminator", metadata={"help": "HF model"})
-    multimodal_fusion_strategy: str = field(default="fuse_late", metadata={"help": "fusion strategy"})
+    output_dir: str = field(default="data/mm_output/", metadata={"help": "data dir", "required": True})
+    hf_model_path: str = field(default="google/electra-base-discriminator", metadata={"help": "Hugging Face model path"})
     per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"})
     num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"})
     batch_size: int = field(default=128,  metadata={"help": "batch size"})
@@ -502,32 +498,16 @@ class AGArgs:
     def __post_init__(self):
         """
         Get the preset using the AGArgs. Save as self.hyperparameters.
-        Ref: https://auto.gluon.ai/0.3.1/tutorials/text_prediction/customization.html
         """
-        if self.backend == "pytorch":
-            from autogluon.text.text_prediction.presets import get_text_preset
-            
-            # get the override from the text preset tuple
-            self.hyperparameters = get_text_preset("default")[1]
-
-            self.hyperparameters["model.hf_text.checkpoint_name"] = self.hf_model_checkpoint_name
-            self.hyperparameters["env.per_gpu_batch_size"] = self.per_device_batch_size
-            self.hyperparameters["env.batch_size"] = self.batch_size
-            self.hyperparameters["optimization.max_epochs"] = self.num_train_epochs
+        from autogluon.text.text_prediction.presets import get_text_preset
         
-        elif self.backend == "mxnet":
-            from autogluon.text.text_prediction.legacy_presets import ag_text_presets
-
-            base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}'
-            self.hyperparameters = ag_text_presets.create(base_key)
-            # NOTE: set batch & epoch
-            search_space = self.hyperparameters["models"]["MultimodalTextModel"]["search_space"]
-            search_space["optimization.per_device_batch_size"] = self.per_device_batch_size
-            search_space["optimization.batch_size"] = self.batch_size
-            search_space["optimization.num_train_epochs"] = self.num_train_epochs
+        # get the override from the text preset tuple
+        self.hyperparameters = get_text_preset("default")[1]
 
-        else:
-            raise ValueError(f"No {self.backend} backend, please choose mxnet or pytorch.")
+        self.hyperparameters["model.hf_text.checkpoint_name"] = self.hf_model_path
+        self.hyperparameters["env.per_gpu_batch_size"] = self.per_device_batch_size
+        self.hyperparameters["env.batch_size"] = self.batch_size
+        self.hyperparameters["optimization.max_epochs"] = self.num_train_epochs
 
     @staticmethod
     def load_args():
diff --git a/setup.py b/setup.py
index 24a622b01f..73a2ef2abb 100644
--- a/setup.py
+++ b/setup.py
@@ -64,7 +64,6 @@
             "seqeval",
         ],
         "autogluon": [
-            "mxnet<2.0.0",
             "autogluon.text==0.4.0",
             "autogluon.features==0.4.0",
         ],
diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 9f27f381e8..4a0d14f454 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -10,77 +10,6 @@
 
 
 def test_multimodalestimator():
-    if sys.version < "3.7":
-        # do not test on python3.6
-        return
-    elif platform.system() == "Windows":
-        # do not test on windows with py3.8
-        return
-
-    seed = 123
-    metric = "r2"
-    train_data = {
-        "sentence1": [
-            "Amrozi accused his brother of deliberately distorting his evidence.",
-        ],
-        "numerical1": [1],
-        "label": [1],
-    }
-
-    valid_data = {
-        "sentence1": [
-            "They had published an advertisement on the Internet on June 10.",
-        ],
-        "numerical1": [1],
-        "label": [1],
-    }
-    train_dataset = pd.DataFrame(train_data)
-    valid_dataset = pd.DataFrame(valid_data)
-    
-    feature_columns = ["sentence1", "numerical1"]
-    automl = AutoML()
-    automl_settings = {
-        "gpu_per_trial": 0,
-        "max_iter": 2,
-        "time_budget": 20,
-        "task": "mm-regression",
-        "metric": "r2",
-        "seed": seed,
-    }
-
-    automl_settings["ag_args"] = {
-        "output_dir": "test/ag_output/",
-        "backend": "mxnet",
-        "text_backbone": "electra_small",
-        "multimodal_fusion_strategy": "fuse_late",
-    }
-
-    automl.fit(
-        X_train=train_dataset[feature_columns],
-        y_train=train_dataset["label"],
-        X_val=valid_dataset[feature_columns],
-        y_val=valid_dataset["label"],
-        eval_method="holdout",
-        auto_augment=False,
-        **automl_settings
-    )
-    automl.pickle("automl.pkl")
-    with open("automl.pkl", "rb") as f:
-        automl = pickle.load(f)
-    print("Try to run inference on validation set")
-    score = automl.score(valid_dataset[feature_columns], valid_dataset["label"])
-    print(f"Inference on validation set complete, {metric}: {score}")
-
-
-def test_pytorch_backend():
-    if sys.version < "3.7":
-        # do not test on python3.6
-        return
-    elif platform.system() == "Windows":
-        # do not test on windows with py3.8
-        return
-    seed = 123
-    metric = "r2"
     train_data = {
         "sentence1": [
             "Mary had a little lamb.",
@@ -102,6 +31,7 @@ def test_pytorch_backend():
     valid_dataset = pd.DataFrame(valid_data)
     
     feature_columns = ["sentence1", "numerical1"]
+    metric = "r2"
     automl = AutoML()
     automl_settings = {
         "gpu_per_trial": 0,
@@ -109,13 +39,12 @@ def test_pytorch_backend():
         "time_budget": 30,
         "task": "mm-regression",
         "metric": "r2",
-        "seed": seed,
+        "seed": 123,
     }
 
     automl_settings["ag_args"] = {
         "output_dir": "test/ag_output/",
-        "backend": "pytorch",
-        "text_backbone": "google/electra-small-discriminator"
+        "hf_model_path": "google/electra-small-discriminator"
     }
 
     automl.fit(

From 98ee13855d421e4453c7f97db0c1953300efac01 Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Thu, 19 May 2022 16:11:16 -0400
Subject: [PATCH 48/50] test remove os and python ver constraints

---
 .github/workflows/python-package.yml | 3 +--
 flaml/automl.py                      | 4 ++--
 flaml/data.py                        | 6 +++---
 flaml/model.py                       | 4 ++--
 flaml/nlp/utils.py                   | 4 ++--
 test/nlp/test_multimodalestimator.py | 2 +-
 6 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 3179338ffc..a1dd082fad 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -47,8 +47,7 @@ jobs:
         run: |
           pip install -e .[ray,forecast]
           pip install 'tensorboardX<=2.2'
-      - name: If python version > 3.6 and not on windows, install autogluon
-        if: matrix.python-version >= '3.7' && (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest')
+      - name: Install autogluon
         run: |
           pip install -e .[autogluon]
       - name: Lint with flake8
diff --git a/flaml/automl.py b/flaml/automl.py
index 8a6864e8f3..b641b189cb 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -1692,7 +1692,7 @@ def _decide_split_type(self, split_type):
                 len(np.unique(self._y_train_all))
             )
         elif self._state.task == "mm-classification":
-             self._state.task = "mm-" + get_classification_objective(
+            self._state.task = "mm-" + get_classification_objective(
                 len(np.unique(self._y_train_all))
             )
         if not isinstance(split_type, str):
@@ -2457,7 +2457,7 @@ def is_to_reverse_metric(metric, task):
                 estimator_list = ["transformer"]
             # NOTE: if multimodal task, use multimodal estimator
             elif self._state.task in MM_TASKS:
-                estimator_list=["multimodal"]
+                estimator_list = ["multimodal"]
             else:
                 try:
                     import catboost
diff --git a/flaml/data.py b/flaml/data.py
index 0cadead32a..6b451e805e 100644
--- a/flaml/data.py
+++ b/flaml/data.py
@@ -50,9 +50,9 @@
     TOKENCLASSIFICATION,
 )
 MM_TASKS = (
-    "mm-classification", 
-    "mm-regression", 
-    "mm-binary", 
+    "mm-classification",
+    "mm-regression",
+    "mm-binary",
     "mm-multiclass",)
 
 
diff --git a/flaml/model.py b/flaml/model.py
index 08a4be47e4..9ee4f5dafb 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -2167,7 +2167,7 @@ def search_space(cls, **params):
             },
             "optimization.warmup_steps": {
                 "domain": tune.choice([0.1, 0.2]),
-                "init_value": 0.1, 
+                "init_value": 0.1,
             },
         }
         return search_space_dict
@@ -2242,7 +2242,7 @@ def score(self, X_val: DataFrame, y_val: Series, **kwargs):
         model = TextPredictor.load(path=self.model_path, backend="pytorch")
         val_data = BaseEstimator._join(X_val, y_val)
         return model.evaluate(val_data)
-        
+
 
 class suppress_stdout_stderr(object):
     def __init__(self):
diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py
index 87dfa5a930..427f91fe5a 100644
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@@ -494,7 +494,7 @@ class AGArgs:
     hf_model_path: str = field(default="google/electra-base-discriminator", metadata={"help": "Hugging Face model path"})
     per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"})
     num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"})
-    batch_size: int = field(default=128,  metadata={"help": "batch size"})
+    batch_size: int = field(default=128, metadata={"help": "batch size"})
     hyperparameters: dict = field(init=False)
 
     def __post_init__(self):
@@ -502,7 +502,7 @@ def __post_init__(self):
         Get the preset using the AGArgs. Save as self.hyperparameters.
         """
         from autogluon.text.text_prediction.presets import get_text_preset
-        
+
         # get the override from the text preset tuple
         self.hyperparameters = get_text_preset("default")[1]
 
diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index 4a0d14f454..f50f63cf22 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -29,7 +29,7 @@ def test_multimodalestimator():
     }
     train_dataset = pd.DataFrame(train_data)
     valid_dataset = pd.DataFrame(valid_data)
-    
+
     feature_columns = ["sentence1", "numerical1"]
     metric = "r2"
     automl = AutoML()

From ff8c078f2f595a271b2094cc15343310f17e94df Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Thu, 19 May 2022 16:29:26 -0400
Subject: [PATCH 49/50] no support for python 3.6

---
 .github/workflows/python-package.yml | 3 ++-
 test/nlp/test_multimodalestimator.py | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index a1dd082fad..3404d1824b 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -47,7 +47,8 @@ jobs:
         run: |
           pip install -e .[ray,forecast]
           pip install 'tensorboardX<=2.2'
-      - name: Install autogluon
+      - name: If python version > 3.6, install autogluon
+        if: matrix.python-version >= '3.7'
         run: |
           pip install -e .[autogluon]
       - name: Lint with flake8
diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index f50f63cf22..f8888d2576 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -10,6 +10,9 @@
 
 
 def test_multimodalestimator():
+    if sys.version < "3.7":
+        # do not test on python3.6
+        return
     train_data = {
         "sentence1": [
             "Mary had a little lamb.",

From 24a5333dd21ffebdb9877ab429921866754b4444 Mon Sep 17 00:00:00 2001
From: Varia <Varia@VariadeMacBook-Pro.local>
Date: Thu, 19 May 2022 16:59:44 -0400
Subject: [PATCH 50/50] no support for python 3.6 or windows

---
 .github/workflows/python-package.yml | 4 ++--
 test/nlp/test_multimodalestimator.py | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 3404d1824b..3179338ffc 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -47,8 +47,8 @@ jobs:
         run: |
           pip install -e .[ray,forecast]
           pip install 'tensorboardX<=2.2'
-      - name: If python version > 3.6, install autogluon
-        if: matrix.python-version >= '3.7'
+      - name: If python version > 3.6 and not on windows, install autogluon
+        if: matrix.python-version >= '3.7' && (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest')
         run: |
           pip install -e .[autogluon]
       - name: Lint with flake8
diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py
index f8888d2576..b80bf41138 100644
--- a/test/nlp/test_multimodalestimator.py
+++ b/test/nlp/test_multimodalestimator.py
@@ -13,6 +13,9 @@ def test_multimodalestimator():
     if sys.version < "3.7":
         # do not test on python3.6
         return
+    elif platform.system() == "Windows":
+        # do not test on windows
+        return
     train_data = {
         "sentence1": [
             "Mary had a little lamb.",