From 0a0a4c6ab7e43c569d3a2be62a1bf0c35ca14dcf Mon Sep 17 00:00:00 2001 From: Varia Date: Tue, 15 Mar 2022 15:02:36 -0400 Subject: [PATCH 01/50] Change readme to trigger test --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ad2294aa62..cc45a701b1 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Downloads](https://pepy.tech/badge/flaml)](https://pepy.tech/project/flaml) [![Join the chat at https://gitter.im/FLAMLer/community](https://badges.gitter.im/FLAMLer/community.svg)](https://gitter.im/FLAMLer/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -# A Fast Library for Automated Machine Learning & Tuning +# A Fast Library for Automated Machine Learning & Tuning.

From 002683f3d6df2258316ad3b68afe6e8d564104eb Mon Sep 17 00:00:00 2001 From: Varia Date: Tue, 15 Mar 2022 15:31:41 -0400 Subject: [PATCH 02/50] add dependencies for AG --- .github/workflows/python-package.yml | 4 ++++ setup.py | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index cebff3fbe1..3404d1824b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -47,6 +47,10 @@ jobs: run: | pip install -e .[ray,forecast] pip install 'tensorboardX<=2.2' + - name: If python version > 3.6, install autogluon + if: matrix.python-version >= '3.7' + run: | + pip install -e .[autogluon] - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names diff --git a/setup.py b/setup.py index 907f1fe50f..f90b427fad 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,11 @@ "hcrystalball==0.1.10", "seqeval", ], + "autogluon": [ + "mxnet<2.0.0", + "autogluon.text==0.4.0", + "autogluon.features==0.4.0", + ], "catboost": ["catboost>=0.26"], "blendsearch": ["optuna==2.8.0"], "ray": [ From 60a847c87eed5edb7b5761bd9f3126b3e3ee9727 Mon Sep 17 00:00:00 2001 From: Varia Date: Tue, 15 Mar 2022 16:29:30 -0400 Subject: [PATCH 03/50] add user permission to test_notebook_example L81 --- flaml/model.py | 155 +++++++++++++++++++++++++++ setup.py | 4 +- test/automl/test_notebook_example.py | 2 +- 3 files changed, 158 insertions(+), 3 deletions(-) diff --git a/flaml/model.py b/flaml/model.py index 9e6c20a1a3..d9f4700ce3 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -1965,6 +1965,161 @@ class XGBoostLimitDepth_TS(TS_SKLearn): base_class = XGBoostLimitDepthEstimator +class AGTextPredictorEstimator(BaseEstimator): + """ + The class for tuning AutoGluon TextPredictor + """ + def __init__(self, task="binary", **params,): + from autogluon.text.text_prediction.mx_predictor import MXTextPredictor + + super().__init__(task, **params) + self.estimator_class = MXTextPredictor + + @classmethod + def search_space(cls, **params): + # Add the possible search space configs here, e.g. 'optimization.lr' + # reference: + # https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values + search_space_dict = { + 'model.network.agg_net.mid_units': { + "domain": tune.choice(list(range(32, 129))), + "init_value": 128 + }, + 'optimization.lr': { + "domain": tune.loguniform(lower=1E-5, upper=1E-4), + "init_value": 1E-4, + }, + 'optimization.wd':{ + "domain": tune.choice([1E-4, 1E-3, 1E-2]), + "init_value":1E-4 + }, + 'optimization.warmup_portion': { + "domain": tune.choice([0.1, 0.2]), + "init_value":0.1, + }, + } + return search_space_dict + + def _init_fix_args(self, automl_fit_kwargs: dict=None): + """ + Save the customed fix args here + this includes: + "output_dir", + "text_backbone": "electra_base" + "multimodal_fusion_strategy":"fuse_late", + """ + fix_args = {} + FIX_ARGS_LIST = ["output_dir", "dataset_name", "label_column", "per_device_batch_size", + "text_backbone", "multimodal_fusion_strategy", ] + for key, value in automl_fit_kwargs["custom_fix_args"].items(): + assert ( + key in FIX_ARGS_LIST + ), "The specified key {} is not in the argument list: output_dir, label_column, dataset_name, text_backbone,\ + multimodal_fusion_strategy".format(key) + + fix_args[key] = value + + self.fix_args = fix_args + + def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str): + + """" + Ref: + https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values + """ + from autogluon.text.text_prediction.legacy_presets import ag_text_presets + + base_key = f'{text_backbone}_{multimodal_fusion_strategy}' + cfg = ag_text_presets.create(base_key) + # NOTE: if the search_space() is modified, add new items or delete here too. + TUNABLE_HP = set(['model.network.agg_net.mid_units', + 'optimization.batch_size', + 'optimization.layerwise_lr_decay', + 'optimization.lr', + 'optimization.nbest', + 'optimization.num_train_epochs', + 'optimization.per_device_batch_size', + 'optimization.wd', + 'optimization.warmup_portion', + ]) + search_space = cfg['models']['MultimodalTextModel']['search_space'] + for key, value in self.params.items(): + if key in TUNABLE_HP: + # NOTE: FLAML uses np.float64 but AG uses float, need to transform + if isinstance(value, np.float64): + search_space[key] = value.item() + else: + search_space[key] = value + search_space['optimization.per_device_batch_size'] = self.fix_args['per_device_batch_size'] + return cfg + + def _set_seed(self, seed): + import random + import mxnet as mx + import torch as th + th.manual_seed(seed) + mx.random.seed(seed) + np.random.seed(seed) + random.seed(seed) + + def fit(self, X_train=None, y_train=None, budget=None, **kwargs): + self._kwargs = kwargs + self._init_fix_args(kwargs) + # the seed set in the bash script for ag experiment is 123 + seed = self.params.get("seed", 123) + self._set_seed(seed) + + # get backbone and fusion strategy + text_backbone=self.fix_args["text_backbone"] + multimodal_fusion_strategy=self.fix_args["multimodal_fusion_strategy"] + + # get & set the save dir, get the dataset info + save_dir = self.fix_args["output_dir"] + label_column = self.fix_args["label_column"] + dataset_name = self.fix_args["dataset_name"] + ag_model_save_dir = os.path.join(save_dir, f"{dataset_name}_ag_text_multimodal_{text_backbone}\ + _{multimodal_fusion_strategy}_no_ensemble") + + # set the of the hyperparameters + self.hyperparameters = self._init_hp_config(text_backbone, multimodal_fusion_strategy) + PROBLEM_TYPE_MAPPING = {"binary": "binary", "multi": "multiclass", "regression": "regression"} + TASK_METRIC_MAPPING = {"multi": "acc", "binary": "roc_auc", "regression": "r2"} + + # train the model + start_time = time.time() + + self._model = self.estimator_class(path=save_dir, + label=label_column, + problem_type=PROBLEM_TYPE_MAPPING[self._task], + eval_metric=TASK_METRIC_MAPPING[self._task]) + + train_data = self._kwargs["train_data"] + + self._model.fit(train_data=train_data, + hyperparameters=self.hyperparameters, + time_limit=budget, + seed=seed) + + training_time = time.time() - start_time + return training_time + + def predict(self, X, as_pandas=False): + output = self._model.predict(self._kwargs["valid_data"], as_pandas=as_pandas) + return output + + + def predict_proba(self, X, as_pandas=False, as_multiclass=True): + # only works for classification tasks + assert ( + self._task in CLASSIFICATION + ), "predict_proba() only for classification tasks." + + output = self._model.predict_proba(self._kwargs["valid_data"], as_pandas=as_pandas) + if not as_multiclass: + if self._task == "binary": + output = output[:, 1] + return output + class suppress_stdout_stderr(object): def __init__(self): # Open a pair of null files diff --git a/setup.py b/setup.py index f90b427fad..24a622b01f 100644 --- a/setup.py +++ b/setup.py @@ -65,9 +65,9 @@ ], "autogluon": [ "mxnet<2.0.0", - "autogluon.text==0.4.0", + "autogluon.text==0.4.0", "autogluon.features==0.4.0", - ], + ], "catboost": ["catboost>=0.26"], "blendsearch": ["optuna==2.8.0"], "ray": [ diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py index 1afb569eb2..adcfdf3298 100644 --- a/test/automl/test_notebook_example.py +++ b/test/automl/test_notebook_example.py @@ -78,7 +78,7 @@ def test_mlflow(): import subprocess import sys - subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"]) + subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow", "--user"]) import mlflow from flaml.data import load_openml_task From 60a9e2707c6d84b3ea9dd08b7d2d48eb288c4bb1 Mon Sep 17 00:00:00 2001 From: Varia Date: Tue, 15 Mar 2022 20:09:26 -0400 Subject: [PATCH 04/50] add mlflow dependency to setup --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 24a622b01f..0f7b19f9c5 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ "rouge_score", "hcrystalball==0.1.10", "seqeval", + "mlflow", ], "autogluon": [ "mxnet<2.0.0", From bc7f38db3bc1b922c16e22fe9af685ba43c5818b Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 16 Mar 2022 09:31:14 -0400 Subject: [PATCH 05/50] add textpredictor estimator and test --- README.md | 2 +- flaml/ml.py | 3 + flaml/model.py | 9 +- test/automl/test_notebook_example.py | 2 +- test/test_agtextpredictor.py | 132 +++++++++++++++++++++++++++ 5 files changed, 141 insertions(+), 7 deletions(-) create mode 100644 test/test_agtextpredictor.py diff --git a/README.md b/README.md index cc45a701b1..ad2294aa62 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Downloads](https://pepy.tech/badge/flaml)](https://pepy.tech/project/flaml) [![Join the chat at https://gitter.im/FLAMLer/community](https://badges.gitter.im/FLAMLer/community.svg)](https://gitter.im/FLAMLer/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -# A Fast Library for Automated Machine Learning & Tuning. +# A Fast Library for Automated Machine Learning & Tuning

diff --git a/flaml/ml.py b/flaml/ml.py index 146fe91acf..55256d3de2 100644 --- a/flaml/ml.py +++ b/flaml/ml.py @@ -37,6 +37,7 @@ ARIMA, SARIMAX, TransformersEstimator, + AGTextPredictorEstimator, ) from .data import CLASSIFICATION, group_counts, TS_FORECAST, TS_VALUE_COL import logging @@ -121,6 +122,8 @@ def get_estimator_class(task, estimator_name): estimator_class = SARIMAX elif estimator_name == "transformer": estimator_class = TransformersEstimator + elif estimator_name == "agtextpredictor": + estimator_class = AGTextPredictorEstimator else: raise ValueError( estimator_name + " is not a built-in learner. " diff --git a/flaml/model.py b/flaml/model.py index d9f4700ce3..54c2abf472 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -2103,18 +2103,17 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): training_time = time.time() - start_time return training_time - def predict(self, X, as_pandas=False): - output = self._model.predict(self._kwargs["valid_data"], as_pandas=as_pandas) + def predict(self, X): + output = self._model.predict(self._kwargs["valid_data"], as_pandas=False) return output - - def predict_proba(self, X, as_pandas=False, as_multiclass=True): + def predict_proba(self, X, as_multiclass=True): # only works for classification tasks assert ( self._task in CLASSIFICATION ), "predict_proba() only for classification tasks." - output = self._model.predict_proba(self._kwargs["valid_data"], as_pandas=as_pandas) + output = self._model.predict_proba(self._kwargs["valid_data"], as_pandas=False) if not as_multiclass: if self._task == "binary": output = output[:, 1] diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py index adcfdf3298..1afb569eb2 100644 --- a/test/automl/test_notebook_example.py +++ b/test/automl/test_notebook_example.py @@ -78,7 +78,7 @@ def test_mlflow(): import subprocess import sys - subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow", "--user"]) + subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"]) import mlflow from flaml.data import load_openml_task diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py new file mode 100644 index 0000000000..0ddec3d1d3 --- /dev/null +++ b/test/test_agtextpredictor.py @@ -0,0 +1,132 @@ +from flaml import AutoML +import pandas as pd +import requests +import sklearn +import numpy as np +import os +import sys +import json +from sklearn.model_selection import train_test_split +os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" + +def default_holdout_frac(num_train_rows, hyperparameter_tune=False): + """ + Returns default holdout_frac used in fit(). + Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples. + Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243 + """ + if num_train_rows < 5000: + holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows)) + else: + holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows)) + + if hyperparameter_tune: + holdout_frac = min(0.2, holdout_frac * 2) # We want to allocate more validation data for HPO to avoid overfitting + + return holdout_frac + +def test_ag_text_predictor(): + if sys.version < "3.7": + # do not test on python3.6 + return + + seed = 123 + metric = "roc_auc" + problem_type = "binary" + train_data = { + "sentence1": [ + 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', + "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", + "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", + "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", + "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", + "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .", + "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", + "The DVD-CCA then appealed to the state Supreme Court .", + ], + "sentence2": [ + 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', + "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", + "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", + "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", + "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", + "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", + "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", + "The DVD CCA appealed that decision to the U.S. Supreme Court .", + ], + "numerical1":[1, 2, 3, 4, 5, 6, 7, 8], + "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"], + "label": [1, 0, 1, 0, 1, 1, 0, 1], + "idx": [0, 1, 2, 3, 4, 5, 6, 7], + } + train_dataset = pd.DataFrame(train_data) + + test_data = { + "sentence1": [ + "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .", + "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .", + "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .", + "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .", + ], + "sentence2": [ + "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .", + "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .", + "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .", + "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .", + ], + "numerical1":[3, 4, 5, 6], + "categorical1": ["b", "a", "a", "b"], + "label": [0, 1, 1, 0], + "idx": [8, 10, 11, 12], + } + test_dataset = pd.DataFrame(test_data) + + # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR + holdout_frac = default_holdout_frac(len(train_dataset), False) + + _, valid_dataset = train_test_split(train_dataset, + test_size=holdout_frac, + random_state=np.random.RandomState(seed)) + + feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] + + automl = AutoML() + automl_settings = { + "gpu_per_trial": 0, + "max_iter": 3, + "time_budget": 50, + "task": "binary", + "metric": "roc_auc", + } + + automl_settings["custom_fix_args"] = { + "output_dir": "test/data/output/", + "text_backbone": "electra_base", + "multimodal_fusion_strategy": "fuse_late", + "dataset_name": "test_ag", + "label_column": "label", + "per_device_batch_size": 4, + } + + try: + automl.fit( + dataframe=train_dataset[feature_columns+["label"]], + label="label", + train_data=train_dataset[feature_columns+["label"]], + valid_data=valid_dataset[feature_columns+["label"]], + X_val=valid_dataset[feature_columns], + y_val=valid_dataset["label"], + estimator_list=["agtextpredictor"], + **automl_settings + ) + except requests.exceptions.HTTPError: + return + + print("Begin to run inference on test set") + save_dir = automl_settings["custom_fix_args"]["output_dir"] + score = automl.model.estimator.evaluate(test_dataset) + print(f"Inference on test set complete, {metric}: {score}") + + +if __name__ == "__main__": + test_ag_text_predictor() \ No newline at end of file From f9ca56ba06f5aa4bdb6978d2d51a216074e62181 Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 16 Mar 2022 09:51:35 -0400 Subject: [PATCH 06/50] new estimator, no test file --- test/test_agtextpredictor.py | 132 ----------------------------------- 1 file changed, 132 deletions(-) delete mode 100644 test/test_agtextpredictor.py diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py deleted file mode 100644 index 0ddec3d1d3..0000000000 --- a/test/test_agtextpredictor.py +++ /dev/null @@ -1,132 +0,0 @@ -from flaml import AutoML -import pandas as pd -import requests -import sklearn -import numpy as np -import os -import sys -import json -from sklearn.model_selection import train_test_split -os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" - -def default_holdout_frac(num_train_rows, hyperparameter_tune=False): - """ - Returns default holdout_frac used in fit(). - Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples. - Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243 - """ - if num_train_rows < 5000: - holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows)) - else: - holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows)) - - if hyperparameter_tune: - holdout_frac = min(0.2, holdout_frac * 2) # We want to allocate more validation data for HPO to avoid overfitting - - return holdout_frac - -def test_ag_text_predictor(): - if sys.version < "3.7": - # do not test on python3.6 - return - - seed = 123 - metric = "roc_auc" - problem_type = "binary" - train_data = { - "sentence1": [ - 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', - "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", - "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", - "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", - "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", - "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .", - "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", - "The DVD-CCA then appealed to the state Supreme Court .", - ], - "sentence2": [ - 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', - "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", - "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", - "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", - "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", - "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", - "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", - "The DVD CCA appealed that decision to the U.S. Supreme Court .", - ], - "numerical1":[1, 2, 3, 4, 5, 6, 7, 8], - "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"], - "label": [1, 0, 1, 0, 1, 1, 0, 1], - "idx": [0, 1, 2, 3, 4, 5, 6, 7], - } - train_dataset = pd.DataFrame(train_data) - - test_data = { - "sentence1": [ - "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .", - "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .", - "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .", - "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .", - ], - "sentence2": [ - "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .", - "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .", - "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .", - "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .", - ], - "numerical1":[3, 4, 5, 6], - "categorical1": ["b", "a", "a", "b"], - "label": [0, 1, 1, 0], - "idx": [8, 10, 11, 12], - } - test_dataset = pd.DataFrame(test_data) - - # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR - holdout_frac = default_holdout_frac(len(train_dataset), False) - - _, valid_dataset = train_test_split(train_dataset, - test_size=holdout_frac, - random_state=np.random.RandomState(seed)) - - feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] - - automl = AutoML() - automl_settings = { - "gpu_per_trial": 0, - "max_iter": 3, - "time_budget": 50, - "task": "binary", - "metric": "roc_auc", - } - - automl_settings["custom_fix_args"] = { - "output_dir": "test/data/output/", - "text_backbone": "electra_base", - "multimodal_fusion_strategy": "fuse_late", - "dataset_name": "test_ag", - "label_column": "label", - "per_device_batch_size": 4, - } - - try: - automl.fit( - dataframe=train_dataset[feature_columns+["label"]], - label="label", - train_data=train_dataset[feature_columns+["label"]], - valid_data=valid_dataset[feature_columns+["label"]], - X_val=valid_dataset[feature_columns], - y_val=valid_dataset["label"], - estimator_list=["agtextpredictor"], - **automl_settings - ) - except requests.exceptions.HTTPError: - return - - print("Begin to run inference on test set") - save_dir = automl_settings["custom_fix_args"]["output_dir"] - score = automl.model.estimator.evaluate(test_dataset) - print(f"Inference on test set complete, {metric}: {score}") - - -if __name__ == "__main__": - test_ag_text_predictor() \ No newline at end of file From fe0ecbb868f29b6aaea61ef8c5949d53281c2b66 Mon Sep 17 00:00:00 2001 From: Qiaochu Song Date: Wed, 16 Mar 2022 11:48:33 -0400 Subject: [PATCH 07/50] Update automl.py --- flaml/automl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaml/automl.py b/flaml/automl.py index cde608a942..8b53c70fd8 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -462,7 +462,7 @@ def custom_metric( def custom_metric( X_val, y_val, estimator, labels, X_train, y_train, weight_val=None, weight_train=None, - *args, + **args, ): from sklearn.metrics import log_loss import time From 4a52ac7397bc493cdc05c82697628844eaa531df Mon Sep 17 00:00:00 2001 From: Qiaochu Song Date: Wed, 16 Mar 2022 11:53:34 -0400 Subject: [PATCH 08/50] Update automl.py --- flaml/automl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaml/automl.py b/flaml/automl.py index 8b53c70fd8..cde608a942 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -462,7 +462,7 @@ def custom_metric( def custom_metric( X_val, y_val, estimator, labels, X_train, y_train, weight_val=None, weight_train=None, - **args, + *args, ): from sklearn.metrics import log_loss import time From 30cc834c50bc13a9ec38228a85788af76c0c3958 Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 16 Mar 2022 15:01:40 -0400 Subject: [PATCH 09/50] add test with gc, narrow down mxnet version --- flaml/model.py | 81 ++++++++++----------- setup.py | 2 +- test/test_agtextpredictor.py | 132 +++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+), 40 deletions(-) create mode 100644 test/test_agtextpredictor.py diff --git a/flaml/model.py b/flaml/model.py index 54c2abf472..f8ed1aade3 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -1977,26 +1977,28 @@ def __init__(self, task="binary", **params,): @classmethod def search_space(cls, **params): - # Add the possible search space configs here, e.g. 'optimization.lr' - # reference: - # https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values + """ + Add the possible search space configs here, e.g. 'optimization.lr' + reference: + https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values + """ search_space_dict = { 'model.network.agg_net.mid_units': { - "domain": tune.choice(list(range(32, 129))), - "init_value": 128 - }, + "domain": tune.choice(list(range(32, 129))), + "init_value": 128, + }, 'optimization.lr': { - "domain": tune.loguniform(lower=1E-5, upper=1E-4), - "init_value": 1E-4, - }, - 'optimization.wd':{ - "domain": tune.choice([1E-4, 1E-3, 1E-2]), - "init_value":1E-4 - }, + "domain": tune.loguniform(lower=1E-5, upper=1E-4), + "init_value": 1E-4, + }, + 'optimization.wd': { + "domain": tune.choice([1E-4, 1E-3, 1E-2]), + "init_value":1E-4, + }, 'optimization.warmup_portion': { - "domain": tune.choice([0.1, 0.2]), - "init_value":0.1, - }, + "domain": tune.choice([0.1, 0.2]), + "init_value":0.1, + }, } return search_space_dict @@ -2006,7 +2008,7 @@ def _init_fix_args(self, automl_fit_kwargs: dict=None): this includes: "output_dir", "text_backbone": "electra_base" - "multimodal_fusion_strategy":"fuse_late", + "multimodal_fusion_strategy":"fuse_late", """ fix_args = {} FIX_ARGS_LIST = ["output_dir", "dataset_name", "label_column", "per_device_batch_size", @@ -2016,7 +2018,7 @@ def _init_fix_args(self, automl_fit_kwargs: dict=None): key in FIX_ARGS_LIST ), "The specified key {} is not in the argument list: output_dir, label_column, dataset_name, text_backbone,\ multimodal_fusion_strategy".format(key) - + fix_args[key] = value self.fix_args = fix_args @@ -2033,15 +2035,15 @@ def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str): cfg = ag_text_presets.create(base_key) # NOTE: if the search_space() is modified, add new items or delete here too. TUNABLE_HP = set(['model.network.agg_net.mid_units', - 'optimization.batch_size', - 'optimization.layerwise_lr_decay', - 'optimization.lr', - 'optimization.nbest', - 'optimization.num_train_epochs', - 'optimization.per_device_batch_size', - 'optimization.wd', - 'optimization.warmup_portion', - ]) + 'optimization.batch_size', + 'optimization.layerwise_lr_decay', + 'optimization.lr', + 'optimization.nbest', + 'optimization.num_train_epochs', + 'optimization.per_device_batch_size', + 'optimization.wd', + 'optimization.warmup_portion', + ]) search_space = cfg['models']['MultimodalTextModel']['search_space'] for key, value in self.params.items(): if key in TUNABLE_HP: @@ -2052,7 +2054,7 @@ def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str): search_space[key] = value search_space['optimization.per_device_batch_size'] = self.fix_args['per_device_batch_size'] return cfg - + def _set_seed(self, seed): import random import mxnet as mx @@ -2068,10 +2070,10 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): # the seed set in the bash script for ag experiment is 123 seed = self.params.get("seed", 123) self._set_seed(seed) - + # get backbone and fusion strategy - text_backbone=self.fix_args["text_backbone"] - multimodal_fusion_strategy=self.fix_args["multimodal_fusion_strategy"] + text_backbone = self.fix_args["text_backbone"] + multimodal_fusion_strategy = self.fix_args["multimodal_fusion_strategy"] # get & set the save dir, get the dataset info save_dir = self.fix_args["output_dir"] @@ -2079,30 +2081,30 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): dataset_name = self.fix_args["dataset_name"] ag_model_save_dir = os.path.join(save_dir, f"{dataset_name}_ag_text_multimodal_{text_backbone}\ _{multimodal_fusion_strategy}_no_ensemble") - + # set the of the hyperparameters self.hyperparameters = self._init_hp_config(text_backbone, multimodal_fusion_strategy) PROBLEM_TYPE_MAPPING = {"binary": "binary", "multi": "multiclass", "regression": "regression"} TASK_METRIC_MAPPING = {"multi": "acc", "binary": "roc_auc", "regression": "r2"} - + # train the model start_time = time.time() - - self._model = self.estimator_class(path=save_dir, + + self._model = self.estimator_class(path=ag_model_save_dir, label=label_column, problem_type=PROBLEM_TYPE_MAPPING[self._task], eval_metric=TASK_METRIC_MAPPING[self._task]) - + train_data = self._kwargs["train_data"] self._model.fit(train_data=train_data, hyperparameters=self.hyperparameters, - time_limit=budget, + time_limit=budget, seed=seed) - + training_time = time.time() - start_time return training_time - + def predict(self, X): output = self._model.predict(self._kwargs["valid_data"], as_pandas=False) return output @@ -2119,6 +2121,7 @@ def predict_proba(self, X, as_multiclass=True): output = output[:, 1] return output + class suppress_stdout_stderr(object): def __init__(self): # Open a pair of null files diff --git a/setup.py b/setup.py index 0f7b19f9c5..e0712f0eaa 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ "mlflow", ], "autogluon": [ - "mxnet<2.0.0", + "mxnet>=1.7.0,<2.0.0", "autogluon.text==0.4.0", "autogluon.features==0.4.0", ], diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py new file mode 100644 index 0000000000..ccaf867075 --- /dev/null +++ b/test/test_agtextpredictor.py @@ -0,0 +1,132 @@ +from flaml import AutoML +import pandas as pd +import requests +import gc +import numpy as np +import os +import sys +from sklearn.model_selection import train_test_split +os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" + +def default_holdout_frac(num_train_rows, hyperparameter_tune=False): + """ + Returns default holdout_frac used in fit(). + Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples. + Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243 + """ + if num_train_rows < 5000: + holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows)) + else: + holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows)) + + if hyperparameter_tune: + holdout_frac = min(0.2, holdout_frac * 2) # We want to allocate more validation data for HPO to avoid overfitting + + return holdout_frac + +def test_ag_text_predictor(): + if sys.version < "3.7": + # do not test on python3.6 + return + + seed = 123 + metric = "roc_auc" + problem_type = "binary" + train_data = { + "sentence1": [ + 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', + "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", + "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", + "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", + "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", + "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .", + "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", + "The DVD-CCA then appealed to the state Supreme Court .", + ], + "sentence2": [ + 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', + "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", + "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", + "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", + "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", + "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", + "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", + "The DVD CCA appealed that decision to the U.S. Supreme Court .", + ], + "numerical1":[1, 2, 3, 4, 5, 6, 7, 8], + "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"], + "label": [1, 0, 1, 0, 1, 1, 0, 1], + "idx": [0, 1, 2, 3, 4, 5, 6, 7], + } + train_dataset = pd.DataFrame(train_data) + + test_data = { + "sentence1": [ + "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .", + "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .", + "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .", + "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .", + ], + "sentence2": [ + "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .", + "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .", + "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .", + "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .", + ], + "numerical1":[3, 4, 5, 6], + "categorical1": ["b", "a", "a", "b"], + "label": [0, 1, 1, 0], + "idx": [8, 10, 11, 12], + } + test_dataset = pd.DataFrame(test_data) + + # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR + holdout_frac = default_holdout_frac(len(train_dataset), False) + + _, valid_dataset = train_test_split(train_dataset, + test_size=holdout_frac, + random_state=np.random.RandomState(seed)) + + feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] + + automl = AutoML() + automl_settings = { + "gpu_per_trial": 0, + "max_iter": 2, + "time_budget": 20, + "task": "binary", + "metric": "roc_auc", + } + + automl_settings["custom_fix_args"] = { + "output_dir": "test/ag/output/", + "text_backbone": "electra_base", + "multimodal_fusion_strategy": "fuse_late", + "dataset_name": "test_ag", + "label_column": "label", + "per_device_batch_size": 2, + } + + try: + automl.fit( + dataframe=train_dataset[feature_columns+["label"]], + label="label", + train_data=train_dataset[feature_columns+["label"]], + valid_data=valid_dataset[feature_columns+["label"]], + X_val=valid_dataset[feature_columns], + y_val=valid_dataset["label"], + estimator_list=["agtextpredictor"], + **automl_settings + ) + except requests.exceptions.HTTPError: + return + + print("Begin to run inference on test set") + score = automl.model.estimator.evaluate(test_dataset) + print(f"Inference on test set complete, {metric}: {score}") + del automl + gc.collect() + + +if __name__ == "__main__": + test_ag_text_predictor() \ No newline at end of file From 6b75a73270cae10cc96be67af183a1c10607e105 Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 16 Mar 2022 16:17:12 -0400 Subject: [PATCH 10/50] skip test for py3.6 and win+py3.8, loose mxnet ver --- setup.py | 2 +- test/test_agtextpredictor.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e0712f0eaa..0f7b19f9c5 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ "mlflow", ], "autogluon": [ - "mxnet>=1.7.0,<2.0.0", + "mxnet<2.0.0", "autogluon.text==0.4.0", "autogluon.features==0.4.0", ], diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py index ccaf867075..d1801a10ee 100644 --- a/test/test_agtextpredictor.py +++ b/test/test_agtextpredictor.py @@ -5,6 +5,7 @@ import numpy as np import os import sys +import platform from sklearn.model_selection import train_test_split os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" @@ -28,6 +29,9 @@ def test_ag_text_predictor(): if sys.version < "3.7": # do not test on python3.6 return + elif platform.system() == "Windows" and sys.version_info.major == 3 and sys.version_info.minor == 8: + # do not test on windows with py3.8 + return seed = 123 metric = "roc_auc" From d10945e785f168a50b9e55c86e2c60f8efb116dd Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 16 Mar 2022 17:32:05 -0400 Subject: [PATCH 11/50] no ag on windows, remove mlflow dependency --- .github/workflows/python-package.yml | 4 ++-- flaml/model.py | 36 +++++++++++++++------------- setup.py | 1 - test/test_agtextpredictor.py | 4 +++- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 3404d1824b..3179338ffc 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -47,8 +47,8 @@ jobs: run: | pip install -e .[ray,forecast] pip install 'tensorboardX<=2.2' - - name: If python version > 3.6, install autogluon - if: matrix.python-version >= '3.7' + - name: If python version > 3.6 and not on windows, install autogluon + if: matrix.python-version >= '3.7' && (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') run: | pip install -e .[autogluon] - name: Lint with flake8 diff --git a/flaml/model.py b/flaml/model.py index f8ed1aade3..f383b8a00d 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -1983,19 +1983,19 @@ def search_space(cls, **params): https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values """ search_space_dict = { - 'model.network.agg_net.mid_units': { + "model.network.agg_net.mid_units": { "domain": tune.choice(list(range(32, 129))), "init_value": 128, }, - 'optimization.lr': { + "optimization.lr": { "domain": tune.loguniform(lower=1E-5, upper=1E-4), "init_value": 1E-4, }, - 'optimization.wd': { + "optimization.wd": { "domain": tune.choice([1E-4, 1E-3, 1E-2]), "init_value":1E-4, }, - 'optimization.warmup_portion': { + "optimization.warmup_portion": { "domain": tune.choice([0.1, 0.2]), "init_value":0.1, }, @@ -2012,7 +2012,7 @@ def _init_fix_args(self, automl_fit_kwargs: dict=None): """ fix_args = {} FIX_ARGS_LIST = ["output_dir", "dataset_name", "label_column", "per_device_batch_size", - "text_backbone", "multimodal_fusion_strategy", ] + "text_backbone", "multimodal_fusion_strategy", "num_train_epochs", "batch_size"] for key, value in automl_fit_kwargs["custom_fix_args"].items(): assert ( key in FIX_ARGS_LIST @@ -2034,17 +2034,20 @@ def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str): base_key = f'{text_backbone}_{multimodal_fusion_strategy}' cfg = ag_text_presets.create(base_key) # NOTE: if the search_space() is modified, add new items or delete here too. - TUNABLE_HP = set(['model.network.agg_net.mid_units', - 'optimization.batch_size', - 'optimization.layerwise_lr_decay', - 'optimization.lr', - 'optimization.nbest', - 'optimization.num_train_epochs', - 'optimization.per_device_batch_size', - 'optimization.wd', - 'optimization.warmup_portion', + TUNABLE_HP = set(["model.network.agg_net.mid_units", + "optimization.batch_size", + "optimization.layerwise_lr_decay", + "optimization.lr", + "optimization.nbest", + "optimization.num_train_epochs", + "optimization.per_device_batch_size", + "optimization.wd", + "optimization.warmup_portion", ]) - search_space = cfg['models']['MultimodalTextModel']['search_space'] + search_space = cfg["models"]["MultimodalTextModel"]["search_space"] + search_space["optimization.per_device_batch_size"] = self.fix_args.get("per_device_batch_size", 4) + search_space["optimization.num_train_epochs"] = self.fix_args.get("num_train_epochs", 10) + search_space["optimization.batch_size"] = self.fix_args.get("batch_size", 128) for key, value in self.params.items(): if key in TUNABLE_HP: # NOTE: FLAML uses np.float64 but AG uses float, need to transform @@ -2052,7 +2055,8 @@ def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str): search_space[key] = value.item() else: search_space[key] = value - search_space['optimization.per_device_batch_size'] = self.fix_args['per_device_batch_size'] + + return cfg def _set_seed(self, seed): diff --git a/setup.py b/setup.py index 0f7b19f9c5..24a622b01f 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,6 @@ "rouge_score", "hcrystalball==0.1.10", "seqeval", - "mlflow", ], "autogluon": [ "mxnet<2.0.0", diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py index d1801a10ee..4ae8293d91 100644 --- a/test/test_agtextpredictor.py +++ b/test/test_agtextpredictor.py @@ -108,7 +108,9 @@ def test_ag_text_predictor(): "multimodal_fusion_strategy": "fuse_late", "dataset_name": "test_ag", "label_column": "label", - "per_device_batch_size": 2, + "per_device_batch_size": 4, + "num_train_epochs": 2, + "batch_size": 4, } try: From 06f64b267f0f1c2e2423c468bb2d08f68bc4d2e9 Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 16 Mar 2022 17:32:50 -0400 Subject: [PATCH 12/50] no ag on windows, remove mlflow dependency --- test/test_agtextpredictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py index 4ae8293d91..61d6886c4a 100644 --- a/test/test_agtextpredictor.py +++ b/test/test_agtextpredictor.py @@ -29,7 +29,7 @@ def test_ag_text_predictor(): if sys.version < "3.7": # do not test on python3.6 return - elif platform.system() == "Windows" and sys.version_info.major == 3 and sys.version_info.minor == 8: + elif platform.system() == "Windows": # do not test on windows with py3.8 return From c9ff3d448fadf34ad3264bbb4b31442e6627ebd9 Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 16 Mar 2022 20:17:15 -0400 Subject: [PATCH 13/50] test with direct return --- flaml/model.py | 6 ++---- test/test_agtextpredictor.py | 32 +++++++++++++++++++------------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/flaml/model.py b/flaml/model.py index f383b8a00d..6d65e69227 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -2055,8 +2055,6 @@ def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str): search_space[key] = value.item() else: search_space[key] = value - - return cfg def _set_seed(self, seed): @@ -2086,12 +2084,12 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): ag_model_save_dir = os.path.join(save_dir, f"{dataset_name}_ag_text_multimodal_{text_backbone}\ _{multimodal_fusion_strategy}_no_ensemble") - # set the of the hyperparameters + # set the hyperparameters self.hyperparameters = self._init_hp_config(text_backbone, multimodal_fusion_strategy) PROBLEM_TYPE_MAPPING = {"binary": "binary", "multi": "multiclass", "regression": "regression"} TASK_METRIC_MAPPING = {"multi": "acc", "binary": "roc_auc", "regression": "r2"} - # train the model + # train the model start_time = time.time() self._model = self.estimator_class(path=ag_model_save_dir, diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py index 61d6886c4a..ee54e4e878 100644 --- a/test/test_agtextpredictor.py +++ b/test/test_agtextpredictor.py @@ -9,6 +9,7 @@ from sklearn.model_selection import train_test_split os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" + def default_holdout_frac(num_train_rows, hyperparameter_tune=False): """ Returns default holdout_frac used in fit(). @@ -21,11 +22,15 @@ def default_holdout_frac(num_train_rows, hyperparameter_tune=False): holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows)) if hyperparameter_tune: - holdout_frac = min(0.2, holdout_frac * 2) # We want to allocate more validation data for HPO to avoid overfitting + holdout_frac = min(0.2, holdout_frac * 2) # to allocate more validation data for HPO to avoid overfitting return holdout_frac - + + def test_ag_text_predictor(): + # DEBUGGING + return + # DEBUGGING if sys.version < "3.7": # do not test on python3.6 return @@ -57,7 +62,7 @@ def test_ag_text_predictor(): "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", "The DVD CCA appealed that decision to the U.S. Supreme Court .", ], - "numerical1":[1, 2, 3, 4, 5, 6, 7, 8], + "numerical1": [1, 2, 3, 4, 5, 6, 7, 8], "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"], "label": [1, 0, 1, 0, 1, 1, 0, 1], "idx": [0, 1, 2, 3, 4, 5, 6, 7], @@ -77,7 +82,7 @@ def test_ag_text_predictor(): "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .", "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .", ], - "numerical1":[3, 4, 5, 6], + "numerical1": [3, 4, 5, 6], "categorical1": ["b", "a", "a", "b"], "label": [0, 1, 1, 0], "idx": [8, 10, 11, 12], @@ -90,29 +95,29 @@ def test_ag_text_predictor(): _, valid_dataset = train_test_split(train_dataset, test_size=holdout_frac, random_state=np.random.RandomState(seed)) - + feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] - + automl = AutoML() automl_settings = { "gpu_per_trial": 0, "max_iter": 2, - "time_budget": 20, + "time_budget": 50, "task": "binary", - "metric": "roc_auc", + "metric": "roc_auc", } - automl_settings["custom_fix_args"] = { + automl_settings["custom_fix_args"] = { "output_dir": "test/ag/output/", "text_backbone": "electra_base", - "multimodal_fusion_strategy": "fuse_late", - "dataset_name": "test_ag", + "multimodal_fusion_strategy": "fuse_late", + "dataset_name": "test_ag", "label_column": "label", "per_device_batch_size": 4, "num_train_epochs": 2, "batch_size": 4, } - + try: automl.fit( dataframe=train_dataset[feature_columns+["label"]], @@ -126,11 +131,12 @@ def test_ag_text_predictor(): ) except requests.exceptions.HTTPError: return - + print("Begin to run inference on test set") score = automl.model.estimator.evaluate(test_dataset) print(f"Inference on test set complete, {metric}: {score}") del automl + # del mx gc.collect() From e7b6f6d1da32fd67c462f3b123b62a943bd2ccfe Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 16 Mar 2022 20:40:32 -0400 Subject: [PATCH 14/50] debug without new test --- test/test_agtextpredictor.py | 144 ----------------------------------- 1 file changed, 144 deletions(-) delete mode 100644 test/test_agtextpredictor.py diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py deleted file mode 100644 index ee54e4e878..0000000000 --- a/test/test_agtextpredictor.py +++ /dev/null @@ -1,144 +0,0 @@ -from flaml import AutoML -import pandas as pd -import requests -import gc -import numpy as np -import os -import sys -import platform -from sklearn.model_selection import train_test_split -os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" - - -def default_holdout_frac(num_train_rows, hyperparameter_tune=False): - """ - Returns default holdout_frac used in fit(). - Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples. - Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243 - """ - if num_train_rows < 5000: - holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows)) - else: - holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows)) - - if hyperparameter_tune: - holdout_frac = min(0.2, holdout_frac * 2) # to allocate more validation data for HPO to avoid overfitting - - return holdout_frac - - -def test_ag_text_predictor(): - # DEBUGGING - return - # DEBUGGING - if sys.version < "3.7": - # do not test on python3.6 - return - elif platform.system() == "Windows": - # do not test on windows with py3.8 - return - - seed = 123 - metric = "roc_auc" - problem_type = "binary" - train_data = { - "sentence1": [ - 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', - "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", - "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", - "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", - "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", - "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .", - "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", - "The DVD-CCA then appealed to the state Supreme Court .", - ], - "sentence2": [ - 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', - "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", - "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", - "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", - "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", - "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", - "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", - "The DVD CCA appealed that decision to the U.S. Supreme Court .", - ], - "numerical1": [1, 2, 3, 4, 5, 6, 7, 8], - "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"], - "label": [1, 0, 1, 0, 1, 1, 0, 1], - "idx": [0, 1, 2, 3, 4, 5, 6, 7], - } - train_dataset = pd.DataFrame(train_data) - - test_data = { - "sentence1": [ - "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .", - "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .", - "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .", - "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .", - ], - "sentence2": [ - "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .", - "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .", - "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .", - "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .", - ], - "numerical1": [3, 4, 5, 6], - "categorical1": ["b", "a", "a", "b"], - "label": [0, 1, 1, 0], - "idx": [8, 10, 11, 12], - } - test_dataset = pd.DataFrame(test_data) - - # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR - holdout_frac = default_holdout_frac(len(train_dataset), False) - - _, valid_dataset = train_test_split(train_dataset, - test_size=holdout_frac, - random_state=np.random.RandomState(seed)) - - feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] - - automl = AutoML() - automl_settings = { - "gpu_per_trial": 0, - "max_iter": 2, - "time_budget": 50, - "task": "binary", - "metric": "roc_auc", - } - - automl_settings["custom_fix_args"] = { - "output_dir": "test/ag/output/", - "text_backbone": "electra_base", - "multimodal_fusion_strategy": "fuse_late", - "dataset_name": "test_ag", - "label_column": "label", - "per_device_batch_size": 4, - "num_train_epochs": 2, - "batch_size": 4, - } - - try: - automl.fit( - dataframe=train_dataset[feature_columns+["label"]], - label="label", - train_data=train_dataset[feature_columns+["label"]], - valid_data=valid_dataset[feature_columns+["label"]], - X_val=valid_dataset[feature_columns], - y_val=valid_dataset["label"], - estimator_list=["agtextpredictor"], - **automl_settings - ) - except requests.exceptions.HTTPError: - return - - print("Begin to run inference on test set") - score = automl.model.estimator.evaluate(test_dataset) - print(f"Inference on test set complete, {metric}: {score}") - del automl - # del mx - gc.collect() - - -if __name__ == "__main__": - test_ag_text_predictor() \ No newline at end of file From 2307b3737c95dcce7bd0b7fd958af925dfe8983b Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 16 Mar 2022 21:08:59 -0400 Subject: [PATCH 15/50] w/o os.environ setting in new test, direct return --- test/test_agtextpredictor.py | 143 +++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 test/test_agtextpredictor.py diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py new file mode 100644 index 0000000000..89348141ae --- /dev/null +++ b/test/test_agtextpredictor.py @@ -0,0 +1,143 @@ +from flaml import AutoML +import pandas as pd +import requests +import gc +import numpy as np +import os +import sys +import platform +from sklearn.model_selection import train_test_split +# os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" + + +def default_holdout_frac(num_train_rows, hyperparameter_tune=False): + """ + Returns default holdout_frac used in fit(). + Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples. + Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243 + """ + if num_train_rows < 5000: + holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows)) + else: + holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows)) + + if hyperparameter_tune: + holdout_frac = min(0.2, holdout_frac * 2) # to allocate more validation data for HPO to avoid overfitting + + return holdout_frac + + +def test_ag_text_predictor(): + # DEBUG + return + # DEBUG + if sys.version < "3.7": + # do not test on python3.6 + return + elif platform.system() == "Windows": + # do not test on windows with py3.8 + return + + seed = 123 + metric = "roc_auc" + train_data = { + "sentence1": [ + 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', + "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", + "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", + "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", + "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", + "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .", + "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", + "The DVD-CCA then appealed to the state Supreme Court .", + ], + "sentence2": [ + 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', + "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", + "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", + "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", + "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", + "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", + "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", + "The DVD CCA appealed that decision to the U.S. Supreme Court .", + ], + "numerical1": [1, 2, 3, 4, 5, 6, 7, 8], + "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"], + "label": [1, 0, 1, 0, 1, 1, 0, 1], + "idx": [0, 1, 2, 3, 4, 5, 6, 7], + } + train_dataset = pd.DataFrame(train_data) + + test_data = { + "sentence1": [ + "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .", + "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .", + "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .", + "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .", + ], + "sentence2": [ + "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .", + "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .", + "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .", + "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .", + ], + "numerical1": [3, 4, 5, 6], + "categorical1": ["b", "a", "a", "b"], + "label": [0, 1, 1, 0], + "idx": [8, 10, 11, 12], + } + test_dataset = pd.DataFrame(test_data) + + # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR + holdout_frac = default_holdout_frac(len(train_dataset), False) + + _, valid_dataset = train_test_split(train_dataset, + test_size=holdout_frac, + random_state=np.random.RandomState(seed)) + + feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] + + automl = AutoML() + automl_settings = { + "gpu_per_trial": 0, + "max_iter": 2, + "time_budget": 50, + "task": "binary", + "metric": "roc_auc", + } + + automl_settings["custom_fix_args"] = { + "output_dir": "test/ag/output/", + "text_backbone": "electra_base", + "multimodal_fusion_strategy": "fuse_late", + "dataset_name": "test_ag", + "label_column": "label", + "per_device_batch_size": 4, + "num_train_epochs": 2, + "batch_size": 4, + } + + try: + automl.fit( + dataframe=train_dataset[feature_columns+["label"]], + label="label", + train_data=train_dataset[feature_columns+["label"]], + valid_data=valid_dataset[feature_columns+["label"]], + X_val=valid_dataset[feature_columns], + y_val=valid_dataset["label"], + estimator_list=["agtextpredictor"], + **automl_settings + ) + except requests.exceptions.HTTPError: + return + + print("Begin to run inference on test set") + score = automl.model.estimator.evaluate(test_dataset) + print(f"Inference on test set complete, {metric}: {score}") + del automl + # del mx + gc.collect() + + +if __name__ == "__main__": + test_ag_text_predictor() \ No newline at end of file From bf3203b93ffcf73a111e26c9b33f2870442ff74a Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 16 Mar 2022 21:32:43 -0400 Subject: [PATCH 16/50] debug, import only in new test --- test/test_agtextpredictor.py | 242 +++++++++++++++++------------------ 1 file changed, 119 insertions(+), 123 deletions(-) diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py index 89348141ae..f0cf02c8ba 100644 --- a/test/test_agtextpredictor.py +++ b/test/test_agtextpredictor.py @@ -10,134 +10,130 @@ # os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" -def default_holdout_frac(num_train_rows, hyperparameter_tune=False): - """ - Returns default holdout_frac used in fit(). - Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples. - Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243 - """ - if num_train_rows < 5000: - holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows)) - else: - holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows)) +# def default_holdout_frac(num_train_rows, hyperparameter_tune=False): +# """ +# Returns default holdout_frac used in fit(). +# Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples. +# Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243 +# """ +# if num_train_rows < 5000: +# holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows)) +# else: +# holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows)) - if hyperparameter_tune: - holdout_frac = min(0.2, holdout_frac * 2) # to allocate more validation data for HPO to avoid overfitting +# if hyperparameter_tune: +# holdout_frac = min(0.2, holdout_frac * 2) # to allocate more validation data for HPO to avoid overfitting - return holdout_frac +# return holdout_frac def test_ag_text_predictor(): # DEBUG return # DEBUG - if sys.version < "3.7": - # do not test on python3.6 - return - elif platform.system() == "Windows": - # do not test on windows with py3.8 - return - - seed = 123 - metric = "roc_auc" - train_data = { - "sentence1": [ - 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', - "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", - "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", - "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", - "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", - "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .", - "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", - "The DVD-CCA then appealed to the state Supreme Court .", - ], - "sentence2": [ - 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', - "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", - "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", - "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", - "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", - "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", - "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", - "The DVD CCA appealed that decision to the U.S. Supreme Court .", - ], - "numerical1": [1, 2, 3, 4, 5, 6, 7, 8], - "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"], - "label": [1, 0, 1, 0, 1, 1, 0, 1], - "idx": [0, 1, 2, 3, 4, 5, 6, 7], - } - train_dataset = pd.DataFrame(train_data) - - test_data = { - "sentence1": [ - "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .", - "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .", - "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .", - "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .", - ], - "sentence2": [ - "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .", - "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .", - "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .", - "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .", - ], - "numerical1": [3, 4, 5, 6], - "categorical1": ["b", "a", "a", "b"], - "label": [0, 1, 1, 0], - "idx": [8, 10, 11, 12], - } - test_dataset = pd.DataFrame(test_data) - - # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR - holdout_frac = default_holdout_frac(len(train_dataset), False) - - _, valid_dataset = train_test_split(train_dataset, - test_size=holdout_frac, - random_state=np.random.RandomState(seed)) - - feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] - - automl = AutoML() - automl_settings = { - "gpu_per_trial": 0, - "max_iter": 2, - "time_budget": 50, - "task": "binary", - "metric": "roc_auc", - } - - automl_settings["custom_fix_args"] = { - "output_dir": "test/ag/output/", - "text_backbone": "electra_base", - "multimodal_fusion_strategy": "fuse_late", - "dataset_name": "test_ag", - "label_column": "label", - "per_device_batch_size": 4, - "num_train_epochs": 2, - "batch_size": 4, - } - - try: - automl.fit( - dataframe=train_dataset[feature_columns+["label"]], - label="label", - train_data=train_dataset[feature_columns+["label"]], - valid_data=valid_dataset[feature_columns+["label"]], - X_val=valid_dataset[feature_columns], - y_val=valid_dataset["label"], - estimator_list=["agtextpredictor"], - **automl_settings - ) - except requests.exceptions.HTTPError: - return - - print("Begin to run inference on test set") - score = automl.model.estimator.evaluate(test_dataset) - print(f"Inference on test set complete, {metric}: {score}") - del automl - # del mx - gc.collect() - - -if __name__ == "__main__": - test_ag_text_predictor() \ No newline at end of file + # if sys.version < "3.7": + # # do not test on python3.6 + # return + # elif platform.system() == "Windows": + # # do not test on windows with py3.8 + # return + + # seed = 123 + # metric = "roc_auc" + # train_data = { + # "sentence1": [ + # 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', + # "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", + # "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", + # "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", + # "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", + # "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .", + # "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", + # "The DVD-CCA then appealed to the state Supreme Court .", + # ], + # "sentence2": [ + # 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', + # "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", + # "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", + # "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", + # "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", + # "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", + # "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", + # "The DVD CCA appealed that decision to the U.S. Supreme Court .", + # ], + # "numerical1": [1, 2, 3, 4, 5, 6, 7, 8], + # "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"], + # "label": [1, 0, 1, 0, 1, 1, 0, 1], + # "idx": [0, 1, 2, 3, 4, 5, 6, 7], + # } + # train_dataset = pd.DataFrame(train_data) + + # test_data = { + # "sentence1": [ + # "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .", + # "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .", + # "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .", + # "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .", + # ], + # "sentence2": [ + # "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .", + # "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .", + # "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .", + # "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .", + # ], + # "numerical1": [3, 4, 5, 6], + # "categorical1": ["b", "a", "a", "b"], + # "label": [0, 1, 1, 0], + # "idx": [8, 10, 11, 12], + # } + # test_dataset = pd.DataFrame(test_data) + + # # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR + # holdout_frac = default_holdout_frac(len(train_dataset), False) + + # _, valid_dataset = train_test_split(train_dataset, + # test_size=holdout_frac, + # random_state=np.random.RandomState(seed)) + + # feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] + + # automl = AutoML() + # automl_settings = { + # "gpu_per_trial": 0, + # "max_iter": 2, + # "time_budget": 50, + # "task": "binary", + # "metric": "roc_auc", + # } + + # automl_settings["custom_fix_args"] = { + # "output_dir": "test/ag/output/", + # "text_backbone": "electra_base", + # "multimodal_fusion_strategy": "fuse_late", + # "dataset_name": "test_ag", + # "label_column": "label", + # "per_device_batch_size": 4, + # "num_train_epochs": 2, + # "batch_size": 4, + # } + + # try: + # automl.fit( + # dataframe=train_dataset[feature_columns+["label"]], + # label="label", + # train_data=train_dataset[feature_columns+["label"]], + # valid_data=valid_dataset[feature_columns+["label"]], + # X_val=valid_dataset[feature_columns], + # y_val=valid_dataset["label"], + # estimator_list=["agtextpredictor"], + # **automl_settings + # ) + # except requests.exceptions.HTTPError: + # return + + # print("Begin to run inference on test set") + # score = automl.model.estimator.evaluate(test_dataset) + # print(f"Inference on test set complete, {metric}: {score}") + # del automl + # # del mx + # gc.collect() \ No newline at end of file From 10c93b28cf2fb23555791d56c2e4fcd87ac41ed7 Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 16 Mar 2022 23:42:33 -0400 Subject: [PATCH 17/50] move new test to automl --- test/automl/test_agtextpredictor.py | 135 +++++++++++++++++++++++++++ test/test_agtextpredictor.py | 139 ---------------------------- 2 files changed, 135 insertions(+), 139 deletions(-) create mode 100644 test/automl/test_agtextpredictor.py delete mode 100644 test/test_agtextpredictor.py diff --git a/test/automl/test_agtextpredictor.py b/test/automl/test_agtextpredictor.py new file mode 100644 index 0000000000..5f198db20b --- /dev/null +++ b/test/automl/test_agtextpredictor.py @@ -0,0 +1,135 @@ +from flaml import AutoML +import pandas as pd +import requests +import gc +import numpy as np +import os +import sys +import platform +from sklearn.model_selection import train_test_split +os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" + + +def default_holdout_frac(num_train_rows, hyperparameter_tune=False): + """ + Returns default holdout_frac used in fit(). + Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples. + Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243 + """ + if num_train_rows < 5000: + holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows)) + else: + holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows)) + + if hyperparameter_tune: + holdout_frac = min(0.2, holdout_frac * 2) # to allocate more validation data for HPO to avoid overfitting + + return holdout_frac + + +def test_ag_text_predictor(): + # # DEBUG + # return + # # DEBUG + if sys.version < "3.7": + # do not test on python3.6 + return + elif platform.system() == "Windows": + # do not test on windows with py3.8 + return + + seed = 123 + metric = "roc_auc" + train_data = { + "sentence1": [ + 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', + "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", + "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", + "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", + "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", + "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .", + "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", + "The DVD-CCA then appealed to the state Supreme Court .", + ], + "sentence2": [ + 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', + "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", + "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", + "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", + "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", + "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", + "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", + "The DVD CCA appealed that decision to the U.S. Supreme Court .", + ], + "numerical1": [1, 2, 3, 4, 5, 6, 7, 8], + "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"], + "label": [1, 0, 1, 0, 1, 1, 0, 1], + "idx": [0, 1, 2, 3, 4, 5, 6, 7], + } + train_dataset = pd.DataFrame(train_data) + + test_data = { + "sentence1": [ + "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .", + "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .", + "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .", + "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .", + ], + "sentence2": [ + "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .", + "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .", + "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .", + "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .", + ], + "numerical1": [3, 4, 5, 6], + "categorical1": ["b", "a", "a", "b"], + "label": [0, 1, 1, 0], + "idx": [8, 10, 11, 12], + } + test_dataset = pd.DataFrame(test_data) + + # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR + holdout_frac = default_holdout_frac(len(train_dataset), False) + + _, valid_dataset = train_test_split(train_dataset, + test_size=holdout_frac, + random_state=np.random.RandomState(seed)) + + feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] + + automl = AutoML() + automl_settings = { + "gpu_per_trial": 0, + "max_iter": 2, + "time_budget": 50, + "task": "binary", + "metric": "roc_auc", + } + + automl_settings["custom_fix_args"] = { + "output_dir": "test/ag/output/", + "text_backbone": "electra_base", + "multimodal_fusion_strategy": "fuse_late", + "dataset_name": "test_ag", + "label_column": "label", + "per_device_batch_size": 4, + "num_train_epochs": 2, + "batch_size": 4, + } + + automl.fit( + dataframe=train_dataset[feature_columns+["label"]], + label="label", + train_data=train_dataset[feature_columns+["label"]], + valid_data=valid_dataset[feature_columns+["label"]], + X_val=valid_dataset[feature_columns], + y_val=valid_dataset["label"], + estimator_list=["agtextpredictor"], + **automl_settings + ) + + print("Begin to run inference on test set") + score = automl.model.estimator.evaluate(test_dataset) + print(f"Inference on test set complete, {metric}: {score}") + del automl + gc.collect() \ No newline at end of file diff --git a/test/test_agtextpredictor.py b/test/test_agtextpredictor.py deleted file mode 100644 index f0cf02c8ba..0000000000 --- a/test/test_agtextpredictor.py +++ /dev/null @@ -1,139 +0,0 @@ -from flaml import AutoML -import pandas as pd -import requests -import gc -import numpy as np -import os -import sys -import platform -from sklearn.model_selection import train_test_split -# os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" - - -# def default_holdout_frac(num_train_rows, hyperparameter_tune=False): -# """ -# Returns default holdout_frac used in fit(). -# Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples. -# Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243 -# """ -# if num_train_rows < 5000: -# holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows)) -# else: -# holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows)) - -# if hyperparameter_tune: -# holdout_frac = min(0.2, holdout_frac * 2) # to allocate more validation data for HPO to avoid overfitting - -# return holdout_frac - - -def test_ag_text_predictor(): - # DEBUG - return - # DEBUG - # if sys.version < "3.7": - # # do not test on python3.6 - # return - # elif platform.system() == "Windows": - # # do not test on windows with py3.8 - # return - - # seed = 123 - # metric = "roc_auc" - # train_data = { - # "sentence1": [ - # 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', - # "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", - # "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", - # "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", - # "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", - # "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .", - # "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", - # "The DVD-CCA then appealed to the state Supreme Court .", - # ], - # "sentence2": [ - # 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', - # "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", - # "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", - # "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", - # "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", - # "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", - # "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", - # "The DVD CCA appealed that decision to the U.S. Supreme Court .", - # ], - # "numerical1": [1, 2, 3, 4, 5, 6, 7, 8], - # "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"], - # "label": [1, 0, 1, 0, 1, 1, 0, 1], - # "idx": [0, 1, 2, 3, 4, 5, 6, 7], - # } - # train_dataset = pd.DataFrame(train_data) - - # test_data = { - # "sentence1": [ - # "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .", - # "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .", - # "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .", - # "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .", - # ], - # "sentence2": [ - # "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .", - # "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .", - # "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .", - # "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .", - # ], - # "numerical1": [3, 4, 5, 6], - # "categorical1": ["b", "a", "a", "b"], - # "label": [0, 1, 1, 0], - # "idx": [8, 10, 11, 12], - # } - # test_dataset = pd.DataFrame(test_data) - - # # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR - # holdout_frac = default_holdout_frac(len(train_dataset), False) - - # _, valid_dataset = train_test_split(train_dataset, - # test_size=holdout_frac, - # random_state=np.random.RandomState(seed)) - - # feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] - - # automl = AutoML() - # automl_settings = { - # "gpu_per_trial": 0, - # "max_iter": 2, - # "time_budget": 50, - # "task": "binary", - # "metric": "roc_auc", - # } - - # automl_settings["custom_fix_args"] = { - # "output_dir": "test/ag/output/", - # "text_backbone": "electra_base", - # "multimodal_fusion_strategy": "fuse_late", - # "dataset_name": "test_ag", - # "label_column": "label", - # "per_device_batch_size": 4, - # "num_train_epochs": 2, - # "batch_size": 4, - # } - - # try: - # automl.fit( - # dataframe=train_dataset[feature_columns+["label"]], - # label="label", - # train_data=train_dataset[feature_columns+["label"]], - # valid_data=valid_dataset[feature_columns+["label"]], - # X_val=valid_dataset[feature_columns], - # y_val=valid_dataset["label"], - # estimator_list=["agtextpredictor"], - # **automl_settings - # ) - # except requests.exceptions.HTTPError: - # return - - # print("Begin to run inference on test set") - # score = automl.model.estimator.evaluate(test_dataset) - # print(f"Inference on test set complete, {metric}: {score}") - # del automl - # # del mx - # gc.collect() \ No newline at end of file From 53b5f09f3b4c733d6952411edf30750a5e8761e1 Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 17 Mar 2022 13:54:31 -0400 Subject: [PATCH 18/50] move new test to test/nlp/ --- test/{automl => nlp}/test_agtextpredictor.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/{automl => nlp}/test_agtextpredictor.py (100%) diff --git a/test/automl/test_agtextpredictor.py b/test/nlp/test_agtextpredictor.py similarity index 100% rename from test/automl/test_agtextpredictor.py rename to test/nlp/test_agtextpredictor.py From ee3cacb67c7b957d048484074af9eef9e6b760b2 Mon Sep 17 00:00:00 2001 From: Varia Date: Mon, 21 Mar 2022 19:13:18 -0400 Subject: [PATCH 19/50] pass data with X_train --- flaml/automl.py | 4 +- flaml/data.py | 12 +++- flaml/model.py | 110 +++++++++++++++++++------------ test/nlp/test_agtextpredictor.py | 50 +++++++------- 4 files changed, 108 insertions(+), 68 deletions(-) diff --git a/flaml/automl.py b/flaml/automl.py index cde608a942..69627bf346 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -46,6 +46,7 @@ REGRESSION, _is_nlp_task, NLG_TASKS, + _is_mm_task, ) from . import tune from .training_log import training_log_reader, training_log_writer @@ -974,7 +975,8 @@ def _validate_data( "or all columns of X are integer ids (tokenized)" ) - if issparse(X_train_all): + if issparse(X_train_all) or _is_mm_task(self._state.task): + # leave the preprocessing to the mm_estimator self._transformer = self._label_transformer = False self._X_train_all, self._y_train_all = X, y else: diff --git a/flaml/data.py b/flaml/data.py index 90162ff2f0..4f14582253 100644 --- a/flaml/data.py +++ b/flaml/data.py @@ -23,9 +23,11 @@ SEQCLASSIFICATION, MULTICHOICECLASSIFICATION, TOKENCLASSIFICATION, + "mm_multi", + "mm_binary", ) SEQREGRESSION = "seq-regression" -REGRESSION = ("regression", SEQREGRESSION) +REGRESSION = ("regression", "mm_regression", SEQREGRESSION) TS_FORECASTREGRESSION = ( "forecast", "ts_forecast", @@ -47,6 +49,14 @@ TOKENCLASSIFICATION, ) +MM_TASKS = ("mm_binary", "mm_multi", "mm_regression") + + + ## ***** ADDED FOR MULTIMODAL ***** +def _is_mm_task(task): + return True if task in MM_TASKS else False +## ***** END ADDED FOR MULTIMODAL ***** + def _is_nlp_task(task): if task in NLU_TASKS or task in NLG_TASKS: diff --git a/flaml/model.py b/flaml/model.py index 6d65e69227..263587c468 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -1970,10 +1970,10 @@ class AGTextPredictorEstimator(BaseEstimator): The class for tuning AutoGluon TextPredictor """ def __init__(self, task="binary", **params,): - from autogluon.text.text_prediction.mx_predictor import MXTextPredictor + from autogluon.text import TextPredictor super().__init__(task, **params) - self.estimator_class = MXTextPredictor + self.estimator_class = TextPredictor @classmethod def search_space(cls, **params): @@ -2011,51 +2011,70 @@ def _init_fix_args(self, automl_fit_kwargs: dict=None): "multimodal_fusion_strategy":"fuse_late", """ fix_args = {} - FIX_ARGS_LIST = ["output_dir", "dataset_name", "label_column", "per_device_batch_size", - "text_backbone", "multimodal_fusion_strategy", "num_train_epochs", "batch_size"] + FIX_ARGS_LIST = ["output_dir", "dataset_name", "label_column", "per_device_batch_size", "backend", + "text_backbone", "multimodal_fusion_strategy", "num_train_epochs", "batch_size",] for key, value in automl_fit_kwargs["custom_fix_args"].items(): assert ( key in FIX_ARGS_LIST - ), "The specified key {} is not in the argument list: output_dir, label_column, dataset_name, text_backbone,\ - multimodal_fusion_strategy".format(key) + ), "The specified key {} is not in the argument list: output_dir, backend, label_column, dataset_name, text_backbone,\ + multimodal_fusion_strategy, num_train_epochs, batch_size, per_device_batch_size".format(key) fix_args[key] = value self.fix_args = fix_args def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str): - """" Ref: https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values """ - from autogluon.text.text_prediction.legacy_presets import ag_text_presets - - base_key = f'{text_backbone}_{multimodal_fusion_strategy}' - cfg = ag_text_presets.create(base_key) - # NOTE: if the search_space() is modified, add new items or delete here too. - TUNABLE_HP = set(["model.network.agg_net.mid_units", - "optimization.batch_size", - "optimization.layerwise_lr_decay", - "optimization.lr", - "optimization.nbest", - "optimization.num_train_epochs", - "optimization.per_device_batch_size", - "optimization.wd", - "optimization.warmup_portion", - ]) - search_space = cfg["models"]["MultimodalTextModel"]["search_space"] - search_space["optimization.per_device_batch_size"] = self.fix_args.get("per_device_batch_size", 4) - search_space["optimization.num_train_epochs"] = self.fix_args.get("num_train_epochs", 10) - search_space["optimization.batch_size"] = self.fix_args.get("batch_size", 128) - for key, value in self.params.items(): - if key in TUNABLE_HP: - # NOTE: FLAML uses np.float64 but AG uses float, need to transform - if isinstance(value, np.float64): - search_space[key] = value.item() - else: - search_space[key] = value - return cfg + if self.fix_args.get("backend", "pytorch") == "mxnet": + from autogluon.text.text_prediction.legacy_presets import ag_text_presets + + base_key = f'{text_backbone}_{multimodal_fusion_strategy}' + cfg = ag_text_presets.create(base_key) + # NOTE: if the search_space() is modified, add new items or delete here too. + TUNABLE_HP = set(["model.network.agg_net.mid_units", + "optimization.batch_size", + "optimization.layerwise_lr_decay", + "optimization.lr", + "optimization.nbest", + "optimization.num_train_epochs", + "optimization.per_device_batch_size", + "optimization.wd", + "optimization.warmup_portion", + ]) + search_space = cfg["models"]["MultimodalTextModel"]["search_space"] + search_space["optimization.per_device_batch_size"] = self.fix_args.get("per_device_batch_size", 4) + search_space["optimization.num_train_epochs"] = self.fix_args.get("num_train_epochs", 10) + search_space["optimization.batch_size"] = self.fix_args.get("batch_size", 128) + for key, value in self.params.items(): + if key in TUNABLE_HP: + # NOTE: FLAML uses np.float64 but AG uses float, need to transform + if isinstance(value, np.float64): + search_space[key] = value.item() + else: + search_space[key] = value + return cfg + + else: + raise ValueError("the pytorch automm model is not supported. ") + # from autogluon.text.text_prediction.presets import get_text_preset + + # cfg, overrides = get_text_preset("default") # get preset for text+num+cat+fusion + # # TODO: set the search space for the auto_mm in AG 0.4.0 + # cfg.hf_text.checkpoint_name = self.fix_args["hf_text.checkpoint_name"] + # # get search configs from self.params and set here + # TUNABLE_HP = [] + # for key, value in self.params.items(): + # if key in TUNABLE_HP: + # # NOTE: FLAML uses np.float64 but AG uses float, might need to transform + # if isinstance(value, np.float64): + # search_space[key] = value.item() + # else: + # search_space[key] = value + return cfg + def _set_seed(self, seed): import random @@ -2086,8 +2105,8 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): # set the hyperparameters self.hyperparameters = self._init_hp_config(text_backbone, multimodal_fusion_strategy) - PROBLEM_TYPE_MAPPING = {"binary": "binary", "multi": "multiclass", "regression": "regression"} - TASK_METRIC_MAPPING = {"multi": "acc", "binary": "roc_auc", "regression": "r2"} + PROBLEM_TYPE_MAPPING = {"mm_binary": "binary", "mm_multi": "multiclass", "mm_regression": "regression"} + TASK_METRIC_MAPPING = {"mm_multi": "acc", "mm_binary": "roc_auc", "mm_regression": "r2"} # train the model start_time = time.time() @@ -2095,11 +2114,16 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): self._model = self.estimator_class(path=ag_model_save_dir, label=label_column, problem_type=PROBLEM_TYPE_MAPPING[self._task], - eval_metric=TASK_METRIC_MAPPING[self._task]) - - train_data = self._kwargs["train_data"] - + eval_metric=TASK_METRIC_MAPPING[self._task], + backend=self.fix_args.get("backend", "pytorch")) + + # train_data = self._kwargs["train_data"] + import pandas as pd + train_data = pd.concat([X_train, y_train], axis=1) + tuning_data = pd.concat([X_train, y_train], axis=1) + self._model.fit(train_data=train_data, + tuning_data=kwargs.get("tuning_data", None), hyperparameters=self.hyperparameters, time_limit=budget, seed=seed) @@ -2108,7 +2132,7 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): return training_time def predict(self, X): - output = self._model.predict(self._kwargs["valid_data"], as_pandas=False) + output = self._model.predict(X, as_pandas=False) return output def predict_proba(self, X, as_multiclass=True): @@ -2117,9 +2141,9 @@ def predict_proba(self, X, as_multiclass=True): self._task in CLASSIFICATION ), "predict_proba() only for classification tasks." - output = self._model.predict_proba(self._kwargs["valid_data"], as_pandas=False) + output = self._model.predict_proba(X, as_pandas=False) if not as_multiclass: - if self._task == "binary": + if self._task == "mm_binary": output = output[:, 1] return output diff --git a/test/nlp/test_agtextpredictor.py b/test/nlp/test_agtextpredictor.py index 5f198db20b..738610a882 100644 --- a/test/nlp/test_agtextpredictor.py +++ b/test/nlp/test_agtextpredictor.py @@ -9,7 +9,6 @@ from sklearn.model_selection import train_test_split os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" - def default_holdout_frac(num_train_rows, hyperparameter_tune=False): """ Returns default holdout_frac used in fit(). @@ -26,11 +25,7 @@ def default_holdout_frac(num_train_rows, hyperparameter_tune=False): return holdout_frac - -def test_ag_text_predictor(): - # # DEBUG - # return - # # DEBUG +def test_ag_mx_textpredictor(): if sys.version < "3.7": # do not test on python3.6 return @@ -39,7 +34,7 @@ def test_ag_text_predictor(): return seed = 123 - metric = "roc_auc" + metric = "accuracy" train_data = { "sentence1": [ 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', @@ -49,7 +44,11 @@ def test_ag_text_predictor(): "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .", "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", - "The DVD-CCA then appealed to the state Supreme Court .", + "The DVD-CCA then appealed to the state Supreme Court .", + "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", + "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", + "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", + "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", ], "sentence2": [ 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', @@ -60,11 +59,14 @@ def test_ag_text_predictor(): "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", "The DVD CCA appealed that decision to the U.S. Supreme Court .", + "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", + "The DVD-CCA then appealed to the state Supreme Court .", + "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", + "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", ], - "numerical1": [1, 2, 3, 4, 5, 6, 7, 8], - "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a"], - "label": [1, 0, 1, 0, 1, 1, 0, 1], - "idx": [0, 1, 2, 3, 4, 5, 6, 7], + "numerical1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], + "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a", "a", "b", "a", "a"], + "label": [1, 0, 2, 0, 1, 2, 0, 1, 1, 2, 0, 1], } train_dataset = pd.DataFrame(train_data) @@ -83,9 +85,8 @@ def test_ag_text_predictor(): ], "numerical1": [3, 4, 5, 6], "categorical1": ["b", "a", "a", "b"], - "label": [0, 1, 1, 0], - "idx": [8, 10, 11, 12], - } + "label": [0, 1, 1, 2], + } test_dataset = pd.DataFrame(test_data) # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR @@ -94,7 +95,7 @@ def test_ag_text_predictor(): _, valid_dataset = train_test_split(train_dataset, test_size=holdout_frac, random_state=np.random.RandomState(seed)) - + feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] automl = AutoML() @@ -102,12 +103,15 @@ def test_ag_text_predictor(): "gpu_per_trial": 0, "max_iter": 2, "time_budget": 50, - "task": "binary", - "metric": "roc_auc", + "task": "mm_multi", + "metric": "accuracy", } automl_settings["custom_fix_args"] = { "output_dir": "test/ag/output/", + # "backend": "pytorch", + "backend": "mxnet", + # "hf_text.checkpoint_name": "google/electra-base-discriminator", "text_backbone": "electra_base", "multimodal_fusion_strategy": "fuse_late", "dataset_name": "test_ag", @@ -118,17 +122,17 @@ def test_ag_text_predictor(): } automl.fit( - dataframe=train_dataset[feature_columns+["label"]], - label="label", - train_data=train_dataset[feature_columns+["label"]], - valid_data=valid_dataset[feature_columns+["label"]], + X_train=train_dataset[feature_columns], + y_train=train_dataset["label"], X_val=valid_dataset[feature_columns], y_val=valid_dataset["label"], + eval_method="holdout", + auto_augment=False, estimator_list=["agtextpredictor"], **automl_settings ) - print("Begin to run inference on test set") + print("Try to run inference on test set") score = automl.model.estimator.evaluate(test_dataset) print(f"Inference on test set complete, {metric}: {score}") del automl From 8096a89ce22c66a9559d8e603f38509bc25711df Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 24 Mar 2022 16:13:42 -0400 Subject: [PATCH 20/50] pr fixes, debugging --- flaml/automl.py | 3 +- flaml/data.py | 39 +++---- flaml/ml.py | 6 +- flaml/model.py | 182 +++++++++++-------------------- flaml/nlp/utils.py | 85 +++++++++++++++ test/nlp/test_agtextpredictor.py | 23 ++-- 6 files changed, 180 insertions(+), 158 deletions(-) diff --git a/flaml/automl.py b/flaml/automl.py index 69627bf346..f83dd897ba 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -46,7 +46,6 @@ REGRESSION, _is_nlp_task, NLG_TASKS, - _is_mm_task, ) from . import tune from .training_log import training_log_reader, training_log_writer @@ -975,7 +974,7 @@ def _validate_data( "or all columns of X are integer ids (tokenized)" ) - if issparse(X_train_all) or _is_mm_task(self._state.task): + if issparse(X_train_all): # leave the preprocessing to the mm_estimator self._transformer = self._label_transformer = False self._X_train_all, self._y_train_all = X, y diff --git a/flaml/data.py b/flaml/data.py index 4f14582253..30968b6012 100644 --- a/flaml/data.py +++ b/flaml/data.py @@ -23,11 +23,9 @@ SEQCLASSIFICATION, MULTICHOICECLASSIFICATION, TOKENCLASSIFICATION, - "mm_multi", - "mm_binary", ) SEQREGRESSION = "seq-regression" -REGRESSION = ("regression", "mm_regression", SEQREGRESSION) +REGRESSION = ("regression", SEQREGRESSION) TS_FORECASTREGRESSION = ( "forecast", "ts_forecast", @@ -49,14 +47,6 @@ TOKENCLASSIFICATION, ) -MM_TASKS = ("mm_binary", "mm_multi", "mm_regression") - - - ## ***** ADDED FOR MULTIMODAL ***** -def _is_mm_task(task): - return True if task in MM_TASKS else False -## ***** END ADDED FOR MULTIMODAL ***** - def _is_nlp_task(task): if task in NLU_TASKS or task in NLG_TASKS: @@ -282,7 +272,8 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): elif isinstance(X, DataFrame): X = X.copy() n = X.shape[0] - cat_columns, num_columns, datetime_columns = [], [], [] + # NOTE: add str_columns here + str_columns, cat_columns, num_columns, datetime_columns = [], [], [], [] drop = False if task in TS_FORECAST: X = X.rename(columns={X.columns[0]: TS_TIMESTAMP_COL}) @@ -292,13 +283,17 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): for column in X.columns: # sklearn\utils\validation.py needs int/float values if X[column].dtype.name in ("object", "category"): - if ( - X[column].nunique() == 1 - or X[column].nunique(dropna=True) - == n - X[column].isnull().sum() - ): + if X[column].nunique() == 1: X.drop(columns=column, inplace=True) drop = True + elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.1): + # NOTE: here a threshold is applied for distinguishing str vs. cat + # if no threshold wanted = requires every non-nan str entry to be different + # delete the line above and uncomment below + # elif X[column].nunique(dropna=True) == n - X[column].isnull().sum(): + # NOTE: here detects str fields, fillna with "" + X[column] = X[column].fillna("") + str_columns.append(column) elif X[column].dtype.name == "category": current_categories = X[column].cat.categories if "__NAN__" not in current_categories: @@ -340,7 +335,7 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): del tmp_dt X[column] = X[column].fillna(np.nan) num_columns.append(column) - X = X[cat_columns + num_columns] + X = X[str_columns + cat_columns + num_columns] if task in TS_FORECAST: X.insert(0, TS_TIMESTAMP_COL, ds_col) if cat_columns: @@ -369,7 +364,8 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): ] ) X[num_columns] = self.transformer.fit_transform(X_num) - self._cat_columns, self._num_columns, self._datetime_columns = ( + self.str_columns, self._cat_columns, self._num_columns, self._datetime_columns = ( + str_columns, cat_columns, num_columns, datetime_columns, @@ -410,7 +406,8 @@ def transform(self, X: Union[DataFrame, np.array]): if len(self._str_columns) > 0: X[self._str_columns] = X[self._str_columns].astype("string") elif isinstance(X, DataFrame): - cat_columns, num_columns, datetime_columns = ( + str_columns, cat_columns, num_columns, datetime_columns = ( + self.str_columns, self._cat_columns, self._num_columns, self._datetime_columns, @@ -436,7 +433,7 @@ def transform(self, X: Union[DataFrame, np.array]): X[new_col_name] = new_col_value X[column] = X[column].map(datetime.toordinal) del tmp_dt - X = X[cat_columns + num_columns].copy() + X = X[str_columns + cat_columns + num_columns].copy() if self._task in TS_FORECAST: X.insert(0, TS_TIMESTAMP_COL, ds_col) for column in cat_columns: diff --git a/flaml/ml.py b/flaml/ml.py index 55256d3de2..34ef0bdabc 100644 --- a/flaml/ml.py +++ b/flaml/ml.py @@ -37,7 +37,7 @@ ARIMA, SARIMAX, TransformersEstimator, - AGTextPredictorEstimator, + MultiModalEstimator, ) from .data import CLASSIFICATION, group_counts, TS_FORECAST, TS_VALUE_COL import logging @@ -122,8 +122,8 @@ def get_estimator_class(task, estimator_name): estimator_class = SARIMAX elif estimator_name == "transformer": estimator_class = TransformersEstimator - elif estimator_name == "agtextpredictor": - estimator_class = AGTextPredictorEstimator + elif estimator_name == "multimodal": + estimator_class = MultiModalEstimator else: raise ValueError( estimator_name + " is not a built-in learner. " diff --git a/flaml/model.py b/flaml/model.py index 263587c468..a980dceb41 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -1965,16 +1965,10 @@ class XGBoostLimitDepth_TS(TS_SKLearn): base_class = XGBoostLimitDepthEstimator -class AGTextPredictorEstimator(BaseEstimator): +class MultiModalEstimator(BaseEstimator): """ The class for tuning AutoGluon TextPredictor """ - def __init__(self, task="binary", **params,): - from autogluon.text import TextPredictor - - super().__init__(task, **params) - self.estimator_class = TextPredictor - @classmethod def search_space(cls, **params): """ @@ -1982,6 +1976,7 @@ def search_space(cls, **params): reference: https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values """ + # TODO: expand the search space search_space_dict = { "model.network.agg_net.mid_units": { "domain": tune.choice(list(range(32, 129))), @@ -1993,138 +1988,91 @@ def search_space(cls, **params): }, "optimization.wd": { "domain": tune.choice([1E-4, 1E-3, 1E-2]), - "init_value":1E-4, + "init_value": 1E-4, }, "optimization.warmup_portion": { "domain": tune.choice([0.1, 0.2]), - "init_value":0.1, + "init_value": 0.1, + }, + "optimization.layerwise_lr_decay": { + "domain": tune.choice([0.8, 0.9]), + "init_value": 0.8, + }, + "optimization.nbest": { + "domain": tune.choice([2, 3, 4,]), + "init_value": 3, + }, + "optimization.num_train_epochs": { + "domain": tune.choice([5, 10, 15,]), + "init_value": 10, + }, + "optimization.per_device_batch_size": { + "domain": tune.choice([2, 4, 8,]), + "init_value": 10, + }, + "optimization.batch_size": { + "domain": tune.choice([32, 64, 128,]), + "init_value": 128, }, } return search_space_dict - def _init_fix_args(self, automl_fit_kwargs: dict=None): - """ - Save the customed fix args here - this includes: - "output_dir", - "text_backbone": "electra_base" - "multimodal_fusion_strategy":"fuse_late", - """ - fix_args = {} - FIX_ARGS_LIST = ["output_dir", "dataset_name", "label_column", "per_device_batch_size", "backend", - "text_backbone", "multimodal_fusion_strategy", "num_train_epochs", "batch_size",] - for key, value in automl_fit_kwargs["custom_fix_args"].items(): - assert ( - key in FIX_ARGS_LIST - ), "The specified key {} is not in the argument list: output_dir, backend, label_column, dataset_name, text_backbone,\ - multimodal_fusion_strategy, num_train_epochs, batch_size, per_device_batch_size".format(key) + def _init_ag_args(self, automl_fit_kwargs: dict = None): + from .nlp.utils import AGArgs - fix_args[key] = value - - self.fix_args = fix_args - - def _init_hp_config(self, text_backbone: str, multimodal_fusion_strategy: str): - """" - Ref: - https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values - """ - if self.fix_args.get("backend", "pytorch") == "mxnet": - from autogluon.text.text_prediction.legacy_presets import ag_text_presets - - base_key = f'{text_backbone}_{multimodal_fusion_strategy}' - cfg = ag_text_presets.create(base_key) - # NOTE: if the search_space() is modified, add new items or delete here too. - TUNABLE_HP = set(["model.network.agg_net.mid_units", - "optimization.batch_size", - "optimization.layerwise_lr_decay", - "optimization.lr", - "optimization.nbest", - "optimization.num_train_epochs", - "optimization.per_device_batch_size", - "optimization.wd", - "optimization.warmup_portion", - ]) - search_space = cfg["models"]["MultimodalTextModel"]["search_space"] - search_space["optimization.per_device_batch_size"] = self.fix_args.get("per_device_batch_size", 4) - search_space["optimization.num_train_epochs"] = self.fix_args.get("num_train_epochs", 10) - search_space["optimization.batch_size"] = self.fix_args.get("batch_size", 128) - for key, value in self.params.items(): - if key in TUNABLE_HP: - # NOTE: FLAML uses np.float64 but AG uses float, need to transform - if isinstance(value, np.float64): - search_space[key] = value.item() - else: - search_space[key] = value - return cfg - - else: - raise ValueError("the pytorch automm model is not supported. ") - # from autogluon.text.text_prediction.presets import get_text_preset - - # cfg, overrides = get_text_preset("default") # get preset for text+num+cat+fusion - # # TODO: set the search space for the auto_mm in AG 0.4.0 - # cfg.hf_text.checkpoint_name = self.fix_args["hf_text.checkpoint_name"] - # # get search configs from self.params and set here - # TUNABLE_HP = [] - # for key, value in self.params.items(): - # if key in TUNABLE_HP: - # # NOTE: FLAML uses np.float64 but AG uses float, might need to transform - # if isinstance(value, np.float64): - # search_space[key] = value.item() - # else: - # search_space[key] = value - return cfg - + ag_args = AGArgs() + for key, val in automl_fit_kwargs["ag_args"].items(): + assert ( + key in ag_args.__dict__ + ), "The specified key {} is not in the argument list of flaml.nlp.utils::AGArgs".format( + key + ) + setattr(ag_args, key, val) + self.ag_args = ag_args def _set_seed(self, seed): import random import mxnet as mx - import torch as th - th.manual_seed(seed) + # import torch as th + # th.manual_seed(seed) mx.random.seed(seed) np.random.seed(seed) random.seed(seed) def fit(self, X_train=None, y_train=None, budget=None, **kwargs): + from autogluon.text import TextPredictor + self._kwargs = kwargs - self._init_fix_args(kwargs) - # the seed set in the bash script for ag experiment is 123 - seed = self.params.get("seed", 123) + self._init_ag_args(kwargs) + seed = self._kwargs.get("seed", 123) self._set_seed(seed) - # get backbone and fusion strategy - text_backbone = self.fix_args["text_backbone"] - multimodal_fusion_strategy = self.fix_args["multimodal_fusion_strategy"] - - # get & set the save dir, get the dataset info - save_dir = self.fix_args["output_dir"] - label_column = self.fix_args["label_column"] - dataset_name = self.fix_args["dataset_name"] - ag_model_save_dir = os.path.join(save_dir, f"{dataset_name}_ag_text_multimodal_{text_backbone}\ - _{multimodal_fusion_strategy}_no_ensemble") - - # set the hyperparameters - self.hyperparameters = self._init_hp_config(text_backbone, multimodal_fusion_strategy) - PROBLEM_TYPE_MAPPING = {"mm_binary": "binary", "mm_multi": "multiclass", "mm_regression": "regression"} - TASK_METRIC_MAPPING = {"mm_multi": "acc", "mm_binary": "roc_auc", "mm_regression": "r2"} + assert (self.ag_args.backend == "mxnet"), "the pytorch automm model is not supported. " + # get & set the hyperparameters, update with self.params + hyperparameters = self.ag_args.get_presets() + search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"] + for key, value in self.params.items(): + # NOTE: FLAML uses np.float64 but AG uses float, need to transform + if isinstance(value, np.float64): + search_space[key] = value.item() + else: + search_space[key] = value + PROBLEM_TYPE_MAPPING = {"binary": "binary", + "multi": "multiclass", + "regression": "regression" + } # train the model start_time = time.time() + self._model = TextPredictor(path=self.ag_args.output_dir, + label="label", + problem_type=PROBLEM_TYPE_MAPPING[self._task], + eval_metric=kwargs["metric"], + backend=self.ag_args.backend) - self._model = self.estimator_class(path=ag_model_save_dir, - label=label_column, - problem_type=PROBLEM_TYPE_MAPPING[self._task], - eval_metric=TASK_METRIC_MAPPING[self._task], - backend=self.fix_args.get("backend", "pytorch")) - - # train_data = self._kwargs["train_data"] - import pandas as pd - train_data = pd.concat([X_train, y_train], axis=1) - tuning_data = pd.concat([X_train, y_train], axis=1) - + train_data = TransformersEstimator._join(X_train, y_train) self._model.fit(train_data=train_data, - tuning_data=kwargs.get("tuning_data", None), - hyperparameters=self.hyperparameters, + hyperparameters=hyperparameters, time_limit=budget, seed=seed) @@ -2135,16 +2083,12 @@ def predict(self, X): output = self._model.predict(X, as_pandas=False) return output - def predict_proba(self, X, as_multiclass=True): + def predict_proba(self, X): # only works for classification tasks assert ( self._task in CLASSIFICATION ), "predict_proba() only for classification tasks." - output = self._model.predict_proba(X, as_pandas=False) - if not as_multiclass: - if self._task == "mm_binary": - output = output[:, 1] return output diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index 22bd25faaa..98a35ee49a 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -573,3 +573,88 @@ def load_args(): ) console_args, unknown = arg_parser.parse_known_args() return console_args + + +@dataclass +class AGArgs: + """ + The Autogluon configurations + Args: + output_dir (str): data root directory for outputing the log and intermediate data, model. + backend (str, optional, defaults to "mxnet"): currently only support to mxnet. + text_backbone (str, optional, defaults to "electra_base"): the text backbone model. + multimodal_fusion_strategy (str, optional, defaults to "fuse_late"): the fuse strategy. + """ + from autogluon.text.text_prediction.legacy_presets import ag_text_presets + + output_dir: str = field( + default="data/mm/output/", metadata={"help": "data dir", "required": True} + ) + backend: str = field(default="mxnet", metadata={"help": "the backend of the multimodal model"}) + text_backbone: str = field(default="electra_base", metadata={"help": "text backbone model"}) + multimodal_fusion_strategy: str = field(default="fuse_late", metadata={"help": "fusion strategy"}) + # TODO: determine whether to tune these HPs + # per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"}) + # num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"}) + # batch_size: int = field(default=128, metadata={"help": "batch size"}) + + + def get_presets(self): + """ + Get the preset using the AGArgs. + {'models': {'MultimodalTextModel': {'backend': 'gluonnlp_v0', + 'search_space': {'model.backbone.name': 'google_electra_small', + 'model.network.agg_net.agg_type': 'concat', + 'model.network.agg_net.mid_units': 128, # [in HPO example] + 'model.network.aggregate_categorical': True, + 'model.use_avg_nbest': True, + 'optimization.batch_size': 128, + 'optimization.layerwise_lr_decay': 0.8, + 'optimization.lr': Categorical[0.0001], + 'optimization.nbest': 3, + 'optimization.num_train_epochs': 10, + 'optimization.per_device_batch_size': 8, + 'optimization.wd': 0.0001, + 'optimization.warmup_portion': 0.1, # [in HPO example] + 'preprocessing.categorical.convert_to_text': False, + 'preprocessing.numerical.convert_to_text': False}}}, + 'tune_kwargs': {'num_trials': 1, + 'scheduler_options': None, + 'search_options': None, + 'search_strategy': 'local', + 'searcher': 'random'}} + Ref: https://auto.gluon.ai/0.3.1/tutorials/text_prediction/customization.html + Return: + hyperparameters: a Dict of the hyperparameter settings. + """ + from autogluon.text.text_prediction.legacy_presets import ag_text_presets + + base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}' + hyperparameters = ag_text_presets.create(base_key) + search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"] + # TODO: set anything that would like to be set via ag_args here + + return hyperparameters + + + @staticmethod + def load_args(): + from dataclasses import fields + + arg_parser = argparse.ArgumentParser() + for each_field in fields(AGArgs): + print(each_field) + arg_parser.add_argument( + "--" + each_field.name, + type=each_field.type, + help=each_field.metadata["help"], + required=each_field.metadata["required"] + if "required" in each_field.metadata + else False, + choices=each_field.metadata["choices"] + if "choices" in each_field.metadata + else None, + default=each_field.default, + ) + console_args, unknown = arg_parser.parse_known_args() + return console_args diff --git a/test/nlp/test_agtextpredictor.py b/test/nlp/test_agtextpredictor.py index 738610a882..c8fe0bc66a 100644 --- a/test/nlp/test_agtextpredictor.py +++ b/test/nlp/test_agtextpredictor.py @@ -25,7 +25,7 @@ def default_holdout_frac(num_train_rows, hyperparameter_tune=False): return holdout_frac -def test_ag_mx_textpredictor(): +def test_multimodalestimator(): if sys.version < "3.7": # do not test on python3.6 return @@ -103,22 +103,15 @@ def test_ag_mx_textpredictor(): "gpu_per_trial": 0, "max_iter": 2, "time_budget": 50, - "task": "mm_multi", + "task": "classification", "metric": "accuracy", } - - automl_settings["custom_fix_args"] = { + # TODO: modify and double check + automl_settings["ag_args"] = { "output_dir": "test/ag/output/", - # "backend": "pytorch", "backend": "mxnet", - # "hf_text.checkpoint_name": "google/electra-base-discriminator", "text_backbone": "electra_base", "multimodal_fusion_strategy": "fuse_late", - "dataset_name": "test_ag", - "label_column": "label", - "per_device_batch_size": 4, - "num_train_epochs": 2, - "batch_size": 4, } automl.fit( @@ -128,7 +121,7 @@ def test_ag_mx_textpredictor(): y_val=valid_dataset["label"], eval_method="holdout", auto_augment=False, - estimator_list=["agtextpredictor"], + estimator_list=["multimodal"], **automl_settings ) @@ -136,4 +129,8 @@ def test_ag_mx_textpredictor(): score = automl.model.estimator.evaluate(test_dataset) print(f"Inference on test set complete, {metric}: {score}") del automl - gc.collect() \ No newline at end of file + gc.collect() + + +if __name__ == "__main__": + test_multimodalestimator() \ No newline at end of file From c40af7d3db28f7862d945800b220c1dd347805ad Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 24 Mar 2022 17:35:41 -0400 Subject: [PATCH 21/50] Rename to MultimodalEstimator, pr fix --- flaml/ml.py | 3 +++ flaml/model.py | 8 ++++---- flaml/nlp/utils.py | 6 ++---- ...est_agtextpredictor.py => test_multimodalestimator.py} | 6 +----- 4 files changed, 10 insertions(+), 13 deletions(-) rename test/nlp/{test_agtextpredictor.py => test_multimodalestimator.py} (98%) diff --git a/flaml/ml.py b/flaml/ml.py index 246dd46b0e..f8ccb2f915 100644 --- a/flaml/ml.py +++ b/flaml/ml.py @@ -568,6 +568,9 @@ def compute_estimator( if isinstance(estimator, TransformersEstimator): fit_kwargs["metric"] = eval_metric + + elif isinstance(estimator, MultiModalEstimator): + fit_kwargs["metric"] = eval_metric if "holdout" == eval_method: val_loss, metric_for_logging, train_time, pred_time = get_val_loss( diff --git a/flaml/model.py b/flaml/model.py index c33a9baf18..65e041dc74 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -2076,6 +2076,7 @@ def _init_ag_args(self, automl_fit_kwargs: dict = None): def _set_seed(self, seed): import random import mxnet as mx + # NOTE: if support pytorch backend, uncomment below # import torch as th # th.manual_seed(seed) mx.random.seed(seed) @@ -2096,23 +2097,22 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"] for key, value in self.params.items(): # NOTE: FLAML uses np.float64 but AG uses float, need to transform - if isinstance(value, np.float64): + if key == "n_jobs": + continue + elif isinstance(value, np.float64): search_space[key] = value.item() else: search_space[key] = value - PROBLEM_TYPE_MAPPING = {"binary": "binary", "multi": "multiclass", "regression": "regression" } - # train the model start_time = time.time() self._model = TextPredictor(path=self.ag_args.output_dir, label="label", problem_type=PROBLEM_TYPE_MAPPING[self._task], eval_metric=kwargs["metric"], backend=self.ag_args.backend) - train_data = TransformersEstimator._join(X_train, y_train) self._model.fit(train_data=train_data, hyperparameters=hyperparameters, diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index 9e8fc4216d..ce18071eef 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -681,15 +681,13 @@ def get_presets(self): 'searcher': 'random'}} Ref: https://auto.gluon.ai/0.3.1/tutorials/text_prediction/customization.html Return: - hyperparameters: a Dict of the hyperparameter settings. + hyperparameters: a Dict of the preset hyperparameter settings. """ from autogluon.text.text_prediction.legacy_presets import ag_text_presets base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}' hyperparameters = ag_text_presets.create(base_key) - search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"] - # TODO: set anything that would like to be set via ag_args here - + # NOTE: set anything else that would like to be set via ag_args here return hyperparameters diff --git a/test/nlp/test_agtextpredictor.py b/test/nlp/test_multimodalestimator.py similarity index 98% rename from test/nlp/test_agtextpredictor.py rename to test/nlp/test_multimodalestimator.py index c8fe0bc66a..0ba8e5ec10 100644 --- a/test/nlp/test_agtextpredictor.py +++ b/test/nlp/test_multimodalestimator.py @@ -106,7 +106,7 @@ def test_multimodalestimator(): "task": "classification", "metric": "accuracy", } - # TODO: modify and double check + automl_settings["ag_args"] = { "output_dir": "test/ag/output/", "backend": "mxnet", @@ -130,7 +130,3 @@ def test_multimodalestimator(): print(f"Inference on test set complete, {metric}: {score}") del automl gc.collect() - - -if __name__ == "__main__": - test_multimodalestimator() \ No newline at end of file From d0b3b11b7a9bef80751b3f4591a689abf380c222 Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 24 Mar 2022 17:46:14 -0400 Subject: [PATCH 22/50] remove comment --- flaml/automl.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flaml/automl.py b/flaml/automl.py index 63da070c91..1571ece494 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -933,7 +933,6 @@ def _validate_data( ) if issparse(X_train_all): - # leave the preprocessing to the mm_estimator self._transformer = self._label_transformer = False self._X_train_all, self._y_train_all = X, y else: From 30e9f60cfa487fb2aac2de54b69d175c65c4b858 Mon Sep 17 00:00:00 2001 From: Qiaochu Song Date: Fri, 25 Mar 2022 11:06:36 -0400 Subject: [PATCH 23/50] Update data.py bug fix --- flaml/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaml/data.py b/flaml/data.py index 30968b6012..eb27e5bf74 100644 --- a/flaml/data.py +++ b/flaml/data.py @@ -286,7 +286,7 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): if X[column].nunique() == 1: X.drop(columns=column, inplace=True) drop = True - elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.1): + elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.9): # NOTE: here a threshold is applied for distinguishing str vs. cat # if no threshold wanted = requires every non-nan str entry to be different # delete the line above and uncomment below From d15dd60590a68a2e75cfa4adef8c7ac0be0be5dd Mon Sep 17 00:00:00 2001 From: Varia Date: Fri, 25 Mar 2022 11:10:40 -0400 Subject: [PATCH 24/50] fix bug --- flaml/data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flaml/data.py b/flaml/data.py index 30968b6012..144a94916b 100644 --- a/flaml/data.py +++ b/flaml/data.py @@ -286,12 +286,12 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): if X[column].nunique() == 1: X.drop(columns=column, inplace=True) drop = True - elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.1): + elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.9): # NOTE: here a threshold is applied for distinguishing str vs. cat - # if no threshold wanted = requires every non-nan str entry to be different + # if no threshold wanted => requires every non-nan str entry to be different # delete the line above and uncomment below # elif X[column].nunique(dropna=True) == n - X[column].isnull().sum(): - # NOTE: here detects str fields, fillna with "" + # NOTE: here detects str fields and do fillna with "" X[column] = X[column].fillna("") str_columns.append(column) elif X[column].dtype.name == "category": From 301eb16e0546b7d82bf755b70c8ec79869274537 Mon Sep 17 00:00:00 2001 From: Varia Date: Fri, 25 Mar 2022 12:03:23 -0400 Subject: [PATCH 25/50] remove useless import --- flaml/nlp/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index ce18071eef..65277a7082 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -641,8 +641,6 @@ class AGArgs: text_backbone (str, optional, defaults to "electra_base"): the text backbone model. multimodal_fusion_strategy (str, optional, defaults to "fuse_late"): the fuse strategy. """ - from autogluon.text.text_prediction.legacy_presets import ag_text_presets - output_dir: str = field( default="data/mm/output/", metadata={"help": "data dir", "required": True} ) From c59a3b27bf04eb11cd7e52c24f111a24d0a08109 Mon Sep 17 00:00:00 2001 From: Varia Date: Fri, 25 Mar 2022 12:03:23 -0400 Subject: [PATCH 26/50] remove useless import --- flaml/nlp/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index ce18071eef..65277a7082 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -641,8 +641,6 @@ class AGArgs: text_backbone (str, optional, defaults to "electra_base"): the text backbone model. multimodal_fusion_strategy (str, optional, defaults to "fuse_late"): the fuse strategy. """ - from autogluon.text.text_prediction.legacy_presets import ag_text_presets - output_dir: str = field( default="data/mm/output/", metadata={"help": "data dir", "required": True} ) From ea515d2ffbbede41ac6768a962cace2877ec364e Mon Sep 17 00:00:00 2001 From: Varia Date: Mon, 28 Mar 2022 15:25:54 -0400 Subject: [PATCH 27/50] remove task mapping for AG --- flaml/model.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/flaml/model.py b/flaml/model.py index 3044585a94..d93a6fc6ee 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -2194,14 +2194,10 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): search_space[key] = value.item() else: search_space[key] = value - PROBLEM_TYPE_MAPPING = {"binary": "binary", - "multi": "multiclass", - "regression": "regression" - } start_time = time.time() self._model = TextPredictor(path=self.ag_args.output_dir, label="label", - problem_type=PROBLEM_TYPE_MAPPING[self._task], + problem_type=self._task, eval_metric=kwargs["metric"], backend=self.ag_args.backend) train_data = TransformersEstimator._join(X_train, y_train) From 6cc2f9ef89779f451634aa4a5a9916f6f86bd41a Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 13 Apr 2022 19:48:22 -0400 Subject: [PATCH 28/50] use 0.5 threshold for text/cat inference --- flaml/automl.py | 8 +++ flaml/data.py | 9 ++- flaml/model.py | 102 ++++++++++++--------------- flaml/nlp/utils.py | 47 ++++-------- test/nlp/test_multimodalestimator.py | 40 ++++------- 5 files changed, 84 insertions(+), 122 deletions(-) diff --git a/flaml/automl.py b/flaml/automl.py index bf3d3ce14f..53d52541d1 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -2280,6 +2280,14 @@ def is_to_reverse_metric(metric, task): ) logger.info("List of ML learners in AutoML Run: {}".format(estimator_list)) self.estimator_list = estimator_list + if self._transformer.text_columns: + if len(self._transformer.text_columns) == len(X_train.columns): + assert _is_nlp_task(self._state.task) == True + else: + self.estimator_list = ["multimodal"] + logger.warning("columns type of {} are set to text".format(self._transformer.text_columns)) + logger.info("numerical columns {}".format(self._transformer._num_columns)) + logger.info("categorical columns {}".format(self._transformer._cat_columns)) self._state.time_budget = time_budget if time_budget > 0 else 1e10 self._active_estimators = estimator_list.copy() self._ensemble = ensemble diff --git a/flaml/data.py b/flaml/data.py index 26cc8ae0a1..4ab80fe600 100644 --- a/flaml/data.py +++ b/flaml/data.py @@ -245,6 +245,9 @@ def concat(X1, X2): class DataTransformer: """Transform input training data.""" + @property + def text_columns(self): + return self._str_columns def fit_transform(self, X: Union[DataFrame, np.array], y, task): """Fit transformer and process the input training data according to the task type. @@ -286,7 +289,7 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): if X[column].nunique() == 1: X.drop(columns=column, inplace=True) drop = True - elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.9): + elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.5): # NOTE: here a threshold is applied for distinguishing str vs. cat # if no threshold wanted => requires every non-nan str entry to be different # delete the line above and uncomment below @@ -364,7 +367,7 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): ] ) X[num_columns] = self.transformer.fit_transform(X_num) - self.str_columns, self._cat_columns, self._num_columns, self._datetime_columns = ( + self._str_columns, self._cat_columns, self._num_columns, self._datetime_columns = ( str_columns, cat_columns, num_columns, @@ -407,7 +410,7 @@ def transform(self, X: Union[DataFrame, np.array]): X[self._str_columns] = X[self._str_columns].astype("string") elif isinstance(X, DataFrame): str_columns, cat_columns, num_columns, datetime_columns = ( - self.str_columns, + self._str_columns, self._cat_columns, self._num_columns, self._datetime_columns, diff --git a/flaml/model.py b/flaml/model.py index d93a6fc6ee..e4c01892f9 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -132,6 +132,13 @@ def estimator(self): def _preprocess(self, X): return X + @staticmethod + def _join(X_train, y_train): + y_train = DataFrame(y_train, index=X_train.index) + y_train.columns = ["label"] + train_df = X_train.join(y_train) + return train_df + def _fit(self, X_train, y_train, **kwargs): current_time = time.time() @@ -361,13 +368,6 @@ def __init__(self, task="seq-classification", **config): from transformers import TrainingArguments self._TrainingArguments = TrainingArguments - @staticmethod - def _join(X_train, y_train): - y_train = DataFrame(y_train, index=X_train.index) - y_train.columns = ["label"] - train_df = X_train.join(y_train) - return train_df - @classmethod def search_space(cls, data_size, task, **params): search_space_dict = { @@ -593,7 +593,7 @@ def on_epoch_end(self, args, state, control, **callback_kwargs): ) train_dataset = Dataset.from_pandas( - TransformersEstimator._join(self._X_train, self._y_train) + BaseEstimator._join(self._X_train, self._y_train) ) if X_val is not None: @@ -603,7 +603,7 @@ def on_epoch_end(self, args, state, control, **callback_kwargs): else: self._X_val, self._y_val = self._preprocess(X=X_val, y=y_val, **kwargs) eval_dataset = Dataset.from_pandas( - TransformersEstimator._join(self._X_val, self._y_val) + BaseEstimator._join(self._X_val, self._y_val) ) else: eval_dataset = None @@ -831,7 +831,7 @@ def score(self, X_val: DataFrame, y_val: Series, **kwargs): self._X_val, self._y_val = self._preprocess(X=X_val, y=y_val) eval_dataset = Dataset.from_pandas( - TransformersEstimator._join(self._X_val, self._y_val) + BaseEstimator._join(self._X_val, self._y_val) ) new_trainer, training_args = self._init_model_for_predict() @@ -2103,6 +2103,12 @@ class MultiModalEstimator(BaseEstimator): """ The class for tuning AutoGluon TextPredictor """ + def __init__(self, task="binary", **config): + super().__init__(task, **config) + import uuid + + self.trial_id = str(uuid.uuid1().hex)[:8] + @classmethod def search_space(cls, **params): """ @@ -2128,26 +2134,6 @@ def search_space(cls, **params): "domain": tune.choice([0.1, 0.2]), "init_value": 0.1, }, - "optimization.layerwise_lr_decay": { - "domain": tune.choice([0.8, 0.9]), - "init_value": 0.8, - }, - "optimization.nbest": { - "domain": tune.choice([2, 3, 4,]), - "init_value": 3, - }, - "optimization.num_train_epochs": { - "domain": tune.choice([5, 10, 15,]), - "init_value": 10, - }, - "optimization.per_device_batch_size": { - "domain": tune.choice([2, 4, 8,]), - "init_value": 10, - }, - "optimization.batch_size": { - "domain": tune.choice([32, 64, 128,]), - "init_value": 128, - }, } return search_space_dict @@ -2164,63 +2150,65 @@ def _init_ag_args(self, automl_fit_kwargs: dict = None): setattr(ag_args, key, val) self.ag_args = ag_args - def _set_seed(self, seed): - import random - import mxnet as mx - # NOTE: if support pytorch backend, uncomment below - # import torch as th - # th.manual_seed(seed) - mx.random.seed(seed) - np.random.seed(seed) - random.seed(seed) - def fit(self, X_train=None, y_train=None, budget=None, **kwargs): from autogluon.text import TextPredictor self._kwargs = kwargs self._init_ag_args(kwargs) seed = self._kwargs.get("seed", 123) - self._set_seed(seed) assert (self.ag_args.backend == "mxnet"), "the pytorch automm model is not supported. " # get & set the hyperparameters, update with self.params - hyperparameters = self.ag_args.get_presets() + hyperparameters = self.ag_args.hyperparameters search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"] for key, value in self.params.items(): # NOTE: FLAML uses np.float64 but AG uses float, need to transform if key == "n_jobs": continue - elif isinstance(value, np.float64): - search_space[key] = value.item() else: - search_space[key] = value + search_space[key] = value.item() if isinstance(value, np.float64) else value start_time = time.time() - self._model = TextPredictor(path=self.ag_args.output_dir, - label="label", - problem_type=self._task, - eval_metric=kwargs["metric"], - backend=self.ag_args.backend) - train_data = TransformersEstimator._join(X_train, y_train) - self._model.fit(train_data=train_data, - hyperparameters=hyperparameters, - time_limit=budget, - seed=seed) + self.model_path = os.path.join(self.ag_args.output_dir, self.trial_id) + model = TextPredictor(path=self.model_path, + label="label", + problem_type=self._task, + eval_metric=kwargs["metric"], + backend=self.ag_args.backend) + train_data = BaseEstimator._join(X_train, y_train) + model.fit(train_data=train_data, + hyperparameters=hyperparameters, + num_gpus=kwargs.get("gpu_per_trial", None), + time_limit=budget, + seed=seed) training_time = time.time() - start_time return training_time def predict(self, X): - output = self._model.predict(X, as_pandas=False) + from autogluon.text import TextPredictor + + model = TextPredictor.load(path=self.model_path, backend=self.ag_args.backend) + output = model.predict(X, as_pandas=False) return output def predict_proba(self, X): + from autogluon.text import TextPredictor + # only works for classification tasks assert ( self._task in CLASSIFICATION ), "predict_proba() only for classification tasks." - output = self._model.predict_proba(X, as_pandas=False) + model = TextPredictor.load(path=self.model_path, backend=self.ag_args.backend) + output = model.predict_proba(X, as_pandas=False) return output + def score(self, X_val: DataFrame, y_val: Series, **kwargs): + from autogluon.text import TextPredictor + + model = TextPredictor.load(path=self.model_path, backend=self.ag_args.backend) + val_data = BaseEstimator._join(X_val, y_val) + return model.evaluate(val_data) + class suppress_stdout_stderr(object): def __init__(self): diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index 0565079b7f..a4cf0779ff 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -650,47 +650,25 @@ class AGArgs: backend: str = field(default="mxnet", metadata={"help": "the backend of the multimodal model"}) text_backbone: str = field(default="electra_base", metadata={"help": "text backbone model"}) multimodal_fusion_strategy: str = field(default="fuse_late", metadata={"help": "fusion strategy"}) - # TODO: determine whether to tune these HPs - # per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"}) - # num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"}) - # batch_size: int = field(default=128, metadata={"help": "batch size"}) - + per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"}) + num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"}) + batch_size: int = field(default=128, metadata={"help": "batch size"}) + hyperparameters: dict = field(init=False) - def get_presets(self): + def __post_init__(self): """ - Get the preset using the AGArgs. - {'models': {'MultimodalTextModel': {'backend': 'gluonnlp_v0', - 'search_space': {'model.backbone.name': 'google_electra_small', - 'model.network.agg_net.agg_type': 'concat', - 'model.network.agg_net.mid_units': 128, # [in HPO example] - 'model.network.aggregate_categorical': True, - 'model.use_avg_nbest': True, - 'optimization.batch_size': 128, - 'optimization.layerwise_lr_decay': 0.8, - 'optimization.lr': Categorical[0.0001], - 'optimization.nbest': 3, - 'optimization.num_train_epochs': 10, - 'optimization.per_device_batch_size': 8, - 'optimization.wd': 0.0001, - 'optimization.warmup_portion': 0.1, # [in HPO example] - 'preprocessing.categorical.convert_to_text': False, - 'preprocessing.numerical.convert_to_text': False}}}, - 'tune_kwargs': {'num_trials': 1, - 'scheduler_options': None, - 'search_options': None, - 'search_strategy': 'local', - 'searcher': 'random'}} + Get the preset using the AGArgs. Save as self.hyperparameters. Ref: https://auto.gluon.ai/0.3.1/tutorials/text_prediction/customization.html - Return: - hyperparameters: a Dict of the preset hyperparameter settings. """ from autogluon.text.text_prediction.legacy_presets import ag_text_presets base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}' - hyperparameters = ag_text_presets.create(base_key) - # NOTE: set anything else that would like to be set via ag_args here - return hyperparameters - + self.hyperparameters = ag_text_presets.create(base_key) + # NOTE: set batch & epoch + search_space = self.hyperparameters["models"]["MultimodalTextModel"]["search_space"] + search_space["optimization.per_device_batch_size"] = self.per_device_batch_size + search_space["optimization.batch_size"] = self.batch_size + search_space["optimization.num_train_epochs"] = self.num_train_epochs @staticmethod def load_args(): @@ -698,7 +676,6 @@ def load_args(): arg_parser = argparse.ArgumentParser() for each_field in fields(AGArgs): - print(each_field) arg_parser.add_argument( "--" + each_field.name, type=each_field.type, diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 0ba8e5ec10..004cd6b9bc 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -1,11 +1,11 @@ from flaml import AutoML import pandas as pd -import requests import gc import numpy as np import os import sys import platform +import pickle from sklearn.model_selection import train_test_split os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" @@ -70,25 +70,6 @@ def test_multimodalestimator(): } train_dataset = pd.DataFrame(train_data) - test_data = { - "sentence1": [ - "That compared with $ 35.18 million , or 24 cents per share , in the year-ago period .", - "Shares of Genentech , a much larger company with several products on the market , rose more than 2 percent .", - "Legislation making it harder for consumers to erase their debts in bankruptcy court won overwhelming House approval in March .", - "The Nasdaq composite index increased 10.73 , or 0.7 percent , to 1,514.77 .", - ], - "sentence2": [ - "Earnings were affected by a non-recurring $ 8 million tax benefit in the year-ago period .", - "Shares of Xoma fell 16 percent in early trade , while shares of Genentech , a much larger company with several products on the market , were up 2 percent .", - "Legislation making it harder for consumers to erase their debts in bankruptcy court won speedy , House approval in March and was endorsed by the White House .", - "The Nasdaq Composite index , full of technology stocks , was lately up around 18 points .", - ], - "numerical1": [3, 4, 5, 6], - "categorical1": ["b", "a", "a", "b"], - "label": [0, 1, 1, 2], - } - test_dataset = pd.DataFrame(test_data) - # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR holdout_frac = default_holdout_frac(len(train_dataset), False) @@ -102,15 +83,15 @@ def test_multimodalestimator(): automl_settings = { "gpu_per_trial": 0, "max_iter": 2, - "time_budget": 50, + "time_budget": 10, "task": "classification", "metric": "accuracy", } automl_settings["ag_args"] = { - "output_dir": "test/ag/output/", + "output_dir": "test/ag_output/", "backend": "mxnet", - "text_backbone": "electra_base", + "text_backbone": "electra_small", "multimodal_fusion_strategy": "fuse_late", } @@ -124,9 +105,14 @@ def test_multimodalestimator(): estimator_list=["multimodal"], **automl_settings ) - - print("Try to run inference on test set") - score = automl.model.estimator.evaluate(test_dataset) - print(f"Inference on test set complete, {metric}: {score}") + automl.pickle("automl.pkl") + with open("automl.pkl", "rb") as f: + automl = pickle.load(f) + print("Try to run inference on validation set") + score = automl.score(valid_dataset[feature_columns], valid_dataset["label"]) + print(f"Inference on validation set complete, {metric}: {score}") del automl gc.collect() + +if __name__ == "__main__": + test_multimodalestimator() \ No newline at end of file From 4cc2b4e783a2b5af3c24c67cf712df76c7ba60d5 Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 14 Apr 2022 11:48:55 -0400 Subject: [PATCH 29/50] add MM_TASKS; no preprocess on X; pass val_data for early stopping --- flaml/automl.py | 16 +++++----- flaml/data.py | 48 +++++++++++++++------------- flaml/ml.py | 2 ++ flaml/model.py | 7 ++-- test/nlp/test_multimodalestimator.py | 26 ++------------- 5 files changed, 44 insertions(+), 55 deletions(-) diff --git a/flaml/automl.py b/flaml/automl.py index 53d52541d1..d8b4f35a21 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -46,6 +46,7 @@ REGRESSION, _is_nlp_task, NLG_TASKS, + MM_TASKS, ) from . import tune from .training_log import training_log_reader, training_log_writer @@ -1480,6 +1481,10 @@ def _decide_split_type(self, split_type): self._state.task = get_classification_objective( len(np.unique(self._y_train_all)) ) + elif self._state.task == "mm-classification": + self._state.task = "mm-" + get_classification_objective( + len(np.unique(self._y_train_all)) + ) if not isinstance(split_type, str): assert hasattr(split_type, "split") and hasattr( split_type, "get_n_splits" @@ -2192,6 +2197,9 @@ def is_to_reverse_metric(metric, task): estimator_list = ["lgbm", "xgboost", "xgb_limitdepth"] elif _is_nlp_task(self._state.task): estimator_list = ["transformer"] + # NOTE: if multimodal task, use multimodal estimator + elif self._state.task in MM_TASKS: + estimator_list=["multimodal"] else: try: import catboost @@ -2280,14 +2288,6 @@ def is_to_reverse_metric(metric, task): ) logger.info("List of ML learners in AutoML Run: {}".format(estimator_list)) self.estimator_list = estimator_list - if self._transformer.text_columns: - if len(self._transformer.text_columns) == len(X_train.columns): - assert _is_nlp_task(self._state.task) == True - else: - self.estimator_list = ["multimodal"] - logger.warning("columns type of {} are set to text".format(self._transformer.text_columns)) - logger.info("numerical columns {}".format(self._transformer._num_columns)) - logger.info("categorical columns {}".format(self._transformer._cat_columns)) self._state.time_budget = time_budget if time_budget > 0 else 1e10 self._active_estimators = estimator_list.copy() self._ensemble = ensemble diff --git a/flaml/data.py b/flaml/data.py index 4ab80fe600..e036975460 100644 --- a/flaml/data.py +++ b/flaml/data.py @@ -23,9 +23,12 @@ SEQCLASSIFICATION, MULTICHOICECLASSIFICATION, TOKENCLASSIFICATION, + "mm-binary", + "mm-multiclass", + "mm-classification", ) SEQREGRESSION = "seq-regression" -REGRESSION = ("regression", SEQREGRESSION) +REGRESSION = ("regression", SEQREGRESSION, "mm-regression") TS_FORECASTREGRESSION = ( "forecast", "ts_forecast", @@ -46,6 +49,11 @@ MULTICHOICECLASSIFICATION, TOKENCLASSIFICATION, ) +MM_TASKS = ( + "mm-classification", + "mm-regression", + "mm-binary", + "mm-multiclass",) def _is_nlp_task(task): @@ -245,10 +253,6 @@ def concat(X1, X2): class DataTransformer: """Transform input training data.""" - @property - def text_columns(self): - return self._str_columns - def fit_transform(self, X: Union[DataFrame, np.array], y, task): """Fit transformer and process the input training data according to the task type. @@ -272,11 +276,14 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): if len(str_columns) > 0: X[str_columns] = X[str_columns].astype("string") self._str_columns = str_columns + # NOTE: if multimodal task, no preprocessing on X + elif task in MM_TASKS: + for column in X.columns: + X[column].astype("object") elif isinstance(X, DataFrame): X = X.copy() n = X.shape[0] - # NOTE: add str_columns here - str_columns, cat_columns, num_columns, datetime_columns = [], [], [], [] + cat_columns, num_columns, datetime_columns = [], [], [] drop = False if task in TS_FORECAST: X = X.rename(columns={X.columns[0]: TS_TIMESTAMP_COL}) @@ -286,17 +293,13 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): for column in X.columns: # sklearn\utils\validation.py needs int/float values if X[column].dtype.name in ("object", "category"): - if X[column].nunique() == 1: + if ( + X[column].nunique() == 1 + or X[column].nunique(dropna=True) + == n - X[column].isnull().sum() + ): X.drop(columns=column, inplace=True) drop = True - elif X[column].nunique(dropna=True) >= int((n - X[column].isnull().sum()) * 0.5): - # NOTE: here a threshold is applied for distinguishing str vs. cat - # if no threshold wanted => requires every non-nan str entry to be different - # delete the line above and uncomment below - # elif X[column].nunique(dropna=True) == n - X[column].isnull().sum(): - # NOTE: here detects str fields and do fillna with "" - X[column] = X[column].fillna("") - str_columns.append(column) elif X[column].dtype.name == "category": current_categories = X[column].cat.categories if "__NAN__" not in current_categories: @@ -338,7 +341,7 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): del tmp_dt X[column] = X[column].fillna(np.nan) num_columns.append(column) - X = X[str_columns + cat_columns + num_columns] + X = X[cat_columns + num_columns] if task in TS_FORECAST: X.insert(0, TS_TIMESTAMP_COL, ds_col) if cat_columns: @@ -367,8 +370,7 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): ] ) X[num_columns] = self.transformer.fit_transform(X_num) - self._str_columns, self._cat_columns, self._num_columns, self._datetime_columns = ( - str_columns, + self._cat_columns, self._num_columns, self._datetime_columns = ( cat_columns, num_columns, datetime_columns, @@ -408,9 +410,11 @@ def transform(self, X: Union[DataFrame, np.array]): # ids (input ids, token type id, attention mask, etc.) if len(self._str_columns) > 0: X[self._str_columns] = X[self._str_columns].astype("string") + elif self._task in MM_TASKS: + for column in X.columns: + X[column].astype("category") elif isinstance(X, DataFrame): - str_columns, cat_columns, num_columns, datetime_columns = ( - self._str_columns, + cat_columns, num_columns, datetime_columns = ( self._cat_columns, self._num_columns, self._datetime_columns, @@ -436,7 +440,7 @@ def transform(self, X: Union[DataFrame, np.array]): X[new_col_name] = new_col_value X[column] = X[column].map(datetime.toordinal) del tmp_dt - X = X[str_columns + cat_columns + num_columns].copy() + X = X[cat_columns + num_columns].copy() if self._task in TS_FORECAST: X.insert(0, TS_TIMESTAMP_COL, ds_col) for column in cat_columns: diff --git a/flaml/ml.py b/flaml/ml.py index 586f06f2d0..436b41caf2 100644 --- a/flaml/ml.py +++ b/flaml/ml.py @@ -580,6 +580,8 @@ def compute_estimator( elif isinstance(estimator, MultiModalEstimator): fit_kwargs["metric"] = eval_metric + fit_kwargs["X_val"] = X_val + fit_kwargs["y_val"] = y_val if "holdout" == eval_method: val_loss, metric_for_logging, train_time, pred_time = get_val_loss( diff --git a/flaml/model.py b/flaml/model.py index e4c01892f9..a0a834092c 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -31,6 +31,7 @@ SUMMARIZATION, NLG_TASKS, MULTICHOICECLASSIFICATION, + MM_TASKS ) try: @@ -2116,7 +2117,6 @@ def search_space(cls, **params): reference: https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values """ - # TODO: expand the search space search_space_dict = { "model.network.agg_net.mid_units": { "domain": tune.choice(list(range(32, 129))), @@ -2169,13 +2169,16 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): search_space[key] = value.item() if isinstance(value, np.float64) else value start_time = time.time() self.model_path = os.path.join(self.ag_args.output_dir, self.trial_id) + assert self._task in MM_TASKS, f"The task is not multimodal, but {self._task}. " model = TextPredictor(path=self.model_path, label="label", - problem_type=self._task, + problem_type=self._task[3:], eval_metric=kwargs["metric"], backend=self.ag_args.backend) train_data = BaseEstimator._join(X_train, y_train) + tuning_data = BaseEstimator._join(kwargs.get("X_val"), kwargs.get("y_val")) model.fit(train_data=train_data, + tuning_data=tuning_data, hyperparameters=hyperparameters, num_gpus=kwargs.get("gpu_per_trial", None), time_limit=budget, diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 004cd6b9bc..dfc748d992 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -9,21 +9,6 @@ from sklearn.model_selection import train_test_split os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" -def default_holdout_frac(num_train_rows, hyperparameter_tune=False): - """ - Returns default holdout_frac used in fit(). - Between row count 5,000 and 25,000 keep 0.1 holdout_frac, as we want to grow validation set to a stable 2500 examples. - Ref: https://github.com/awslabs/autogluon/blob/master/core/src/autogluon/core/utils/utils.py#L243 - """ - if num_train_rows < 5000: - holdout_frac = max(0.1, min(0.2, 500.0 / num_train_rows)) - else: - holdout_frac = max(0.01, min(0.1, 2500.0 / num_train_rows)) - - if hyperparameter_tune: - holdout_frac = min(0.2, holdout_frac * 2) # to allocate more validation data for HPO to avoid overfitting - - return holdout_frac def test_multimodalestimator(): if sys.version < "3.7": @@ -69,12 +54,8 @@ def test_multimodalestimator(): "label": [1, 0, 2, 0, 1, 2, 0, 1, 1, 2, 0, 1], } train_dataset = pd.DataFrame(train_data) - - # FORCE THE SAME TRAIN-VALID SPLIT IN & OUT THE PREDICTOR - holdout_frac = default_holdout_frac(len(train_dataset), False) - - _, valid_dataset = train_test_split(train_dataset, - test_size=holdout_frac, + train_dataset, valid_dataset = train_test_split(train_dataset, + test_size=0.2, random_state=np.random.RandomState(seed)) feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] @@ -84,7 +65,7 @@ def test_multimodalestimator(): "gpu_per_trial": 0, "max_iter": 2, "time_budget": 10, - "task": "classification", + "task": "mm-classification", "metric": "accuracy", } @@ -102,7 +83,6 @@ def test_multimodalestimator(): y_val=valid_dataset["label"], eval_method="holdout", auto_augment=False, - estimator_list=["multimodal"], **automl_settings ) automl.pickle("automl.pkl") From 4fa136d8ab300142caa73093399b4e37722bce5c Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 14 Apr 2022 14:14:44 -0400 Subject: [PATCH 30/50] adjust testing data and raise budget --- test/nlp/test_multimodalestimator.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index dfc748d992..9175669e0e 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -32,8 +32,6 @@ def test_multimodalestimator(): "The DVD-CCA then appealed to the state Supreme Court .", "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", - "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", - "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", ], "sentence2": [ 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', @@ -46,12 +44,10 @@ def test_multimodalestimator(): "The DVD CCA appealed that decision to the U.S. Supreme Court .", "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", "The DVD-CCA then appealed to the state Supreme Court .", - "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", - "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", ], - "numerical1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a", "a", "b", "a", "a"], - "label": [1, 0, 2, 0, 1, 2, 0, 1, 1, 2, 0, 1], + "numerical1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a", "a", "b"], + "label": [1, 0, 2, 0, 1, 2, 0, 1, 1, 2], } train_dataset = pd.DataFrame(train_data) train_dataset, valid_dataset = train_test_split(train_dataset, @@ -64,7 +60,7 @@ def test_multimodalestimator(): automl_settings = { "gpu_per_trial": 0, "max_iter": 2, - "time_budget": 10, + "time_budget": 15, "task": "mm-classification", "metric": "accuracy", } @@ -93,6 +89,3 @@ def test_multimodalestimator(): print(f"Inference on validation set complete, {metric}: {score}") del automl gc.collect() - -if __name__ == "__main__": - test_multimodalestimator() \ No newline at end of file From 25c1baf285fec3f988a7b54491c7e1806f26fb8f Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 14 Apr 2022 14:59:37 -0400 Subject: [PATCH 31/50] shrink test toy data and budget --- test/nlp/test_multimodalestimator.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 9175669e0e..4b70677d07 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -1,6 +1,5 @@ from flaml import AutoML import pandas as pd -import gc import numpy as np import os import sys @@ -27,11 +26,6 @@ def test_multimodalestimator(): "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", - "Revenue in the first quarter of the year dropped 15 percent from the same period a year earlier .", - "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", - "The DVD-CCA then appealed to the state Supreme Court .", - "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", - "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", ], "sentence2": [ 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', @@ -39,15 +33,10 @@ def test_multimodalestimator(): "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", - "With the scandal hanging over Stewart 's company , revenue the first quarter of the year dropped 15 percent from the same period a year earlier .", - "The tech-laced Nasdaq Composite .IXIC rallied 30.46 points , or 2.04 percent , to 1,520.15 .", - "The DVD CCA appealed that decision to the U.S. Supreme Court .", - "The Nasdaq had a weekly gain of 17.27 , or 1.2 percent , closing at 1,520.15 on Friday .", - "The DVD-CCA then appealed to the state Supreme Court .", ], - "numerical1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - "categorical1": ["a", "b", "a", "a", "a", "b", "a", "a", "a", "b"], - "label": [1, 0, 2, 0, 1, 2, 0, 1, 1, 2], + "numerical1": [1, 2, 3, 4, 5], + "categorical1": ["a", "b", "a", "b", "a", ], + "label": [1, 0, 1, 0, 1,], } train_dataset = pd.DataFrame(train_data) train_dataset, valid_dataset = train_test_split(train_dataset, @@ -63,6 +52,7 @@ def test_multimodalestimator(): "time_budget": 15, "task": "mm-classification", "metric": "accuracy", + "seed": seed, } automl_settings["ag_args"] = { @@ -87,5 +77,3 @@ def test_multimodalestimator(): print("Try to run inference on validation set") score = automl.score(valid_dataset[feature_columns], valid_dataset["label"]) print(f"Inference on validation set complete, {metric}: {score}") - del automl - gc.collect() From f9d3b22491c0f90dc7d26ba274d7d0911c76bd4c Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 14 Apr 2022 15:36:35 -0400 Subject: [PATCH 32/50] change to regression test --- test/nlp/test_multimodalestimator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 4b70677d07..deae2e2c91 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -35,8 +35,8 @@ def test_multimodalestimator(): "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", ], "numerical1": [1, 2, 3, 4, 5], - "categorical1": ["a", "b", "a", "b", "a", ], - "label": [1, 0, 1, 0, 1,], + "categorical1": ["a", "b", "a", "b", "a"], + "label": [5, 4, 3, 2, 1], } train_dataset = pd.DataFrame(train_data) train_dataset, valid_dataset = train_test_split(train_dataset, @@ -50,8 +50,8 @@ def test_multimodalestimator(): "gpu_per_trial": 0, "max_iter": 2, "time_budget": 15, - "task": "mm-classification", - "metric": "accuracy", + "task": "mm-regression", + "metric": "r2", "seed": seed, } From c1568b447ef227e2d0a9866ca77e208db536f95f Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 14 Apr 2022 16:12:15 -0400 Subject: [PATCH 33/50] add metric to kwargs for mm in train_estimator, raise test budget --- flaml/ml.py | 2 ++ test/nlp/test_multimodalestimator.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/flaml/ml.py b/flaml/ml.py index 436b41caf2..cbe7f5e0d8 100644 --- a/flaml/ml.py +++ b/flaml/ml.py @@ -637,6 +637,8 @@ def train_estimator( ) if isinstance(estimator, TransformersEstimator): fit_kwargs["metric"] = eval_metric + elif isinstance(estimator, MultiModalEstimator): + fit_kwargs["metric"] = eval_metric if X_train is not None: train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs) diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index deae2e2c91..dc82e696cd 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -49,7 +49,7 @@ def test_multimodalestimator(): automl_settings = { "gpu_per_trial": 0, "max_iter": 2, - "time_budget": 15, + "time_budget": 20, "task": "mm-regression", "metric": "r2", "seed": seed, From 1e4201d49568400865e00053b3ff15056f9e916b Mon Sep 17 00:00:00 2001 From: Varia Date: Fri, 15 Apr 2022 17:38:39 -0400 Subject: [PATCH 34/50] use valid data if any for early stopping, raise test budget --- flaml/model.py | 9 ++++++++- test/nlp/test_multimodalestimator.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/flaml/model.py b/flaml/model.py index 1f74762aa9..ba5aaa1609 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -2181,7 +2181,14 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): eval_metric=kwargs["metric"], backend=self.ag_args.backend) train_data = BaseEstimator._join(X_train, y_train) - tuning_data = BaseEstimator._join(kwargs.get("X_val"), kwargs.get("y_val")) + # use valid data for early stopping + X_val = kwargs.get("X_val") + y_val = kwargs.get("y_val") + if X_val and y_val: + tuning_data = BaseEstimator._join(X_val, y_val) + else: + tuning_data = None + # NOTE: if no tuning_data, model.fit() will holdout a fraction from train_data for early stopping model.fit(train_data=train_data, tuning_data=tuning_data, hyperparameters=hyperparameters, diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index dc82e696cd..7f7dab6f52 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -49,7 +49,7 @@ def test_multimodalestimator(): automl_settings = { "gpu_per_trial": 0, "max_iter": 2, - "time_budget": 20, + "time_budget": 30, "task": "mm-regression", "metric": "r2", "seed": seed, From 9692d4eded7bc16a3b5fecada2a056a58606aacb Mon Sep 17 00:00:00 2001 From: Varia Date: Fri, 15 Apr 2022 19:26:41 -0400 Subject: [PATCH 35/50] return to the original budget --- test/nlp/test_multimodalestimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 7f7dab6f52..bb6518f19d 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -49,7 +49,7 @@ def test_multimodalestimator(): automl_settings = { "gpu_per_trial": 0, "max_iter": 2, - "time_budget": 30, + "time_budget": 50, "task": "mm-regression", "metric": "r2", "seed": seed, From 1b2cb28e6572525036e44741da8ceb430e3b520a Mon Sep 17 00:00:00 2001 From: Varia Date: Sat, 16 Apr 2022 01:23:02 -0400 Subject: [PATCH 36/50] fix valid DF checking --- flaml/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaml/model.py b/flaml/model.py index ba5aaa1609..196f8228c2 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -2184,7 +2184,7 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): # use valid data for early stopping X_val = kwargs.get("X_val") y_val = kwargs.get("y_val") - if X_val and y_val: + if X_val is not None and y_val is not None: tuning_data = BaseEstimator._join(X_val, y_val) else: tuning_data = None From 05941bc059fc999012d0716f702422c4a7eb0b2b Mon Sep 17 00:00:00 2001 From: Varia Date: Mon, 18 Apr 2022 16:10:15 -0400 Subject: [PATCH 37/50] simplify isinstance in ml.py --- flaml/ml.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/flaml/ml.py b/flaml/ml.py index cbe7f5e0d8..8cd2e7e974 100644 --- a/flaml/ml.py +++ b/flaml/ml.py @@ -573,12 +573,7 @@ def compute_estimator( n_jobs=n_jobs, ) - if isinstance(estimator, TransformersEstimator): - fit_kwargs["metric"] = eval_metric - fit_kwargs["X_val"] = X_val - fit_kwargs["y_val"] = y_val - - elif isinstance(estimator, MultiModalEstimator): + if isinstance(estimator, (TransformersEstimator, MultiModalEstimator)): fit_kwargs["metric"] = eval_metric fit_kwargs["X_val"] = X_val fit_kwargs["y_val"] = y_val From 74f27b589adc4dd4f0f9a82e28810987b732791a Mon Sep 17 00:00:00 2001 From: Varia Date: Tue, 19 Apr 2022 12:31:55 -0400 Subject: [PATCH 38/50] reduce text column and budget --- test/nlp/test_multimodalestimator.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index bb6518f19d..7b81fe49e0 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -27,13 +27,13 @@ def test_multimodalestimator(): "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", ], - "sentence2": [ - 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', - "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", - "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", - "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", - "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", - ], + # "sentence2": [ + # 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', + # "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", + # "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", + # "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", + # "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", + # ], "numerical1": [1, 2, 3, 4, 5], "categorical1": ["a", "b", "a", "b", "a"], "label": [5, 4, 3, 2, 1], @@ -43,13 +43,13 @@ def test_multimodalestimator(): test_size=0.2, random_state=np.random.RandomState(seed)) - feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] - + # feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] + feature_columns = ["sentence1", "numerical1", "categorical1"] automl = AutoML() automl_settings = { "gpu_per_trial": 0, "max_iter": 2, - "time_budget": 50, + "time_budget": 20, "task": "mm-regression", "metric": "r2", "seed": seed, From c8848c76194b5b10616aaab6d5f019c7730981c5 Mon Sep 17 00:00:00 2001 From: Varia Date: Tue, 19 Apr 2022 13:21:32 -0400 Subject: [PATCH 39/50] use only 4-row toy test data --- test/nlp/test_multimodalestimator.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 7b81fe49e0..16092256c2 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -25,25 +25,16 @@ def test_multimodalestimator(): "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", - "The stock rose $ 2.11 , or about 11 percent , to close Friday at $ 21.51 on the New York Stock Exchange .", ], - # "sentence2": [ - # 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .', - # "Yucaipa bought Dominick 's in 1995 for $ 693 million and sold it to Safeway for $ 1.8 billion in 1998 .", - # "On June 10 , the ship 's owners had published an advertisement on the Internet , offering the explosives for sale .", - # "Tab shares jumped 20 cents , or 4.6 % , to set a record closing high at A $ 4.57 .", - # "PG & E Corp. shares jumped $ 1.63 or 8 percent to $ 21.03 on the New York Stock Exchange on Friday .", - # ], - "numerical1": [1, 2, 3, 4, 5], - "categorical1": ["a", "b", "a", "b", "a"], - "label": [5, 4, 3, 2, 1], + "numerical1": [1, 2, 3, 4], + "categorical1": ["a", "b", "a", "b"], + "label": [5, 4, 3, 2], } train_dataset = pd.DataFrame(train_data) train_dataset, valid_dataset = train_test_split(train_dataset, test_size=0.2, random_state=np.random.RandomState(seed)) - # feature_columns = ["sentence1", "sentence2", "numerical1", "categorical1"] feature_columns = ["sentence1", "numerical1", "categorical1"] automl = AutoML() automl_settings = { From 7be2c5c6848e4f9c129c1c166d8144c22d0abcb7 Mon Sep 17 00:00:00 2001 From: Varia Date: Tue, 19 Apr 2022 14:06:40 -0400 Subject: [PATCH 40/50] test 10s budget --- test/nlp/test_multimodalestimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 16092256c2..6c08ad58b1 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -40,7 +40,7 @@ def test_multimodalestimator(): automl_settings = { "gpu_per_trial": 0, "max_iter": 2, - "time_budget": 20, + "time_budget": 10, "task": "mm-regression", "metric": "r2", "seed": seed, From 1c7f7ad431750816cea506b6454d39bd95e5de19 Mon Sep 17 00:00:00 2001 From: Varia Date: Tue, 19 Apr 2022 15:59:06 -0400 Subject: [PATCH 41/50] minimize test toy dataset --- test/nlp/test_multimodalestimator.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 6c08ad58b1..d6bb197891 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -18,24 +18,26 @@ def test_multimodalestimator(): return seed = 123 - metric = "accuracy" + metric = "r2" train_data = { "sentence1": [ 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', - "Yucaipa owned Dominick 's before selling the chain to Safeway in 1998 for $ 2.5 billion .", + ], + "numerical1": [1], + "label": [1], + } + + valid_data = { + "sentence1": [ "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", - "Around 0335 GMT , Tab shares were up 19 cents , or 4.4 % , at A $ 4.56 , having earlier set a record high of A $ 4.57 .", ], - "numerical1": [1, 2, 3, 4], - "categorical1": ["a", "b", "a", "b"], - "label": [5, 4, 3, 2], + "numerical1": [1], + "label": [1], } train_dataset = pd.DataFrame(train_data) - train_dataset, valid_dataset = train_test_split(train_dataset, - test_size=0.2, - random_state=np.random.RandomState(seed)) + valid_dataset = pd.DataFrame(valid_data) - feature_columns = ["sentence1", "numerical1", "categorical1"] + feature_columns = ["sentence1", "numerical1"] automl = AutoML() automl_settings = { "gpu_per_trial": 0, From be60fa6c22bb0f68e8a6b720d1a15401ceb500ed Mon Sep 17 00:00:00 2001 From: Varia Date: Tue, 19 Apr 2022 16:38:01 -0400 Subject: [PATCH 42/50] shorter test sentence --- test/nlp/test_multimodalestimator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index d6bb197891..4d0a864677 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -21,7 +21,7 @@ def test_multimodalestimator(): metric = "r2" train_data = { "sentence1": [ - 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .', + "Amrozi accused his brother of deliberately distorting his evidence.", ], "numerical1": [1], "label": [1], @@ -29,7 +29,7 @@ def test_multimodalestimator(): valid_data = { "sentence1": [ - "They had published an advertisement on the Internet on June 10 , offering the cargo for sale , he added .", + "They had published an advertisement on the Internet on June 10.", ], "numerical1": [1], "label": [1], From 3a29c5b276ddef0366f79dce923a2892372f6496 Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 20 Apr 2022 14:23:23 -0400 Subject: [PATCH 43/50] give enough test budget --- test/nlp/test_multimodalestimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 4d0a864677..63f7f7bc50 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -42,7 +42,7 @@ def test_multimodalestimator(): automl_settings = { "gpu_per_trial": 0, "max_iter": 2, - "time_budget": 10, + "time_budget": 20, "task": "mm-regression", "metric": "r2", "seed": seed, From 543b660e1afcd42cf69cf83e348951c546934ed0 Mon Sep 17 00:00:00 2001 From: Varia Date: Wed, 20 Apr 2022 14:23:23 -0400 Subject: [PATCH 44/50] give enough test budget --- flaml/automl.py | 2 ++ flaml/data.py | 4 ---- test/nlp/test_multimodalestimator.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/flaml/automl.py b/flaml/automl.py index d8b4f35a21..c5818c72be 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -721,6 +721,8 @@ def score(self, X: pd.DataFrame, y: pd.Series, **kwargs): ) return None X = self._preprocess(X) + if self._label_transformer: + y = self._label_transformer.transform(y) return estimator.score(X, y, **kwargs) def predict( diff --git a/flaml/data.py b/flaml/data.py index e036975460..0cadead32a 100644 --- a/flaml/data.py +++ b/flaml/data.py @@ -395,13 +395,9 @@ def transform(self, X: Union[DataFrame, np.array]): Args: X: A numpy array or a pandas dataframe of training data. - y: A numpy array or a pandas series of labels. - task: A string of the task type, e.g., - 'classification', 'regression', 'ts_forecast', 'rank'. Returns: X: Processed numpy array or pandas dataframe of training data. - y: Processed numpy array or pandas series of labels. """ X = X.copy() diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 4d0a864677..63f7f7bc50 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -42,7 +42,7 @@ def test_multimodalestimator(): automl_settings = { "gpu_per_trial": 0, "max_iter": 2, - "time_budget": 10, + "time_budget": 20, "task": "mm-regression", "metric": "r2", "seed": seed, From 5bd061f354418064a8e4df746d682dafc0bc0420 Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 12 May 2022 15:02:09 -0400 Subject: [PATCH 45/50] add pytorch backend support --- flaml/model.py | 50 +++++++++++++++------- flaml/nlp/utils.py | 30 +++++++++---- test/nlp/test_multimodalestimator.py | 63 ++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 24 deletions(-) diff --git a/flaml/model.py b/flaml/model.py index 3707db4804..c7083ce84a 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -2159,14 +2159,14 @@ def search_space(cls, **params): def _init_ag_args(self, automl_fit_kwargs: dict = None): from .nlp.utils import AGArgs - ag_args = AGArgs() - for key, val in automl_fit_kwargs["ag_args"].items(): - assert ( - key in ag_args.__dict__ - ), "The specified key {} is not in the argument list of flaml.nlp.utils::AGArgs".format( - key - ) - setattr(ag_args, key, val) + ag_args = AGArgs(**automl_fit_kwargs["ag_args"]) + # for key, val in automl_fit_kwargs["ag_args"].items(): + # assert ( + # key in ag_args.__dict__ + # ), "The specified key {} is not in the argument list of flaml.nlp.utils::AGArgs".format( + # key + # ) + # setattr(ag_args, key, val) self.ag_args = ag_args def fit(self, X_train=None, y_train=None, budget=None, **kwargs): @@ -2176,16 +2176,34 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): self._init_ag_args(kwargs) seed = self._kwargs.get("seed", 123) - assert (self.ag_args.backend == "mxnet"), "the pytorch automm model is not supported. " # get & set the hyperparameters, update with self.params hyperparameters = self.ag_args.hyperparameters - search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"] - for key, value in self.params.items(): - # NOTE: FLAML uses np.float64 but AG uses float, need to transform - if key == "n_jobs": - continue - else: - search_space[key] = value.item() if isinstance(value, np.float64) else value + if self.ag_args.backend == "mxnet": + search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"] + for key, value in self.params.items(): + # NOTE: FLAML uses np.float64 but AG uses float, need to transform + if key == "n_jobs": + continue + else: + search_space[key] = value.item() if isinstance(value, np.float64) else value + # elif using pytorch backend + else: + # TODO: if pytorch only, remove this mapper and modify the search space keys directly + # then AGargs in utils.py should be modify accordingly + KEY_MAPPER = { + "model.network.agg_net.mid_units": "model.fusion_mlp.hidden_sizes", + "optimization.lr": "optimization.learning_rate", + "optimization.wd": "optimization.weight_decay", + "optimization.warmup_portion": "warmup_steps", + } + for key, value in self.params.items(): + if key == "n_jobs": + continue + elif key == "model.network.agg_net.mid_units": + hyperparameters[KEY_MAPPER[key]] = [value] + else: + hyperparameters[key] = value.item() if isinstance(value, np.float64) else value + start_time = time.time() self.model_path = os.path.join(self.ag_args.output_dir, self.trial_id) assert self._task in MM_TASKS, f"The task is not multimodal, but {self._task}. " diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index 9c7b68d136..6ed52d11bc 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -491,7 +491,8 @@ class AGArgs: default="data/mm/output/", metadata={"help": "data dir", "required": True} ) backend: str = field(default="mxnet", metadata={"help": "the backend of the multimodal model"}) - text_backbone: str = field(default="electra_base", metadata={"help": "text backbone model"}) + text_backbone: str = field(default="electra_base", metadata={"help": "mxnet text backbone model"}) + hf_model_checkpoint_name: str = field(default="google/electra-base-discriminator", metadata={"help": "HF model"}) multimodal_fusion_strategy: str = field(default="fuse_late", metadata={"help": "fusion strategy"}) per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"}) num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"}) @@ -504,14 +505,27 @@ def __post_init__(self): Ref: https://auto.gluon.ai/0.3.1/tutorials/text_prediction/customization.html """ from autogluon.text.text_prediction.legacy_presets import ag_text_presets + from autogluon.text.text_prediction.presets import get_text_preset + + if self.backend == "mxnet": + base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}' + self.hyperparameters = ag_text_presets.create(base_key) + # NOTE: set batch & epoch + search_space = self.hyperparameters["models"]["MultimodalTextModel"]["search_space"] + search_space["optimization.per_device_batch_size"] = self.per_device_batch_size + search_space["optimization.batch_size"] = self.batch_size + search_space["optimization.num_train_epochs"] = self.num_train_epochs + elif self.backend == "pytorch": + # get the override from the text preset tuple + self.hyperparameters = get_text_preset("default")[1] + + self.hyperparameters["model.hf_text.checkpoint_name"] = self.hf_model_checkpoint_name + self.hyperparameters["env.per_gpu_batch_size"] = self.per_device_batch_size + self.hyperparameters["env.batch_size"] = self.batch_size + self.hyperparameters["optimization.max_epochs"] = self.num_train_epochs - base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}' - self.hyperparameters = ag_text_presets.create(base_key) - # NOTE: set batch & epoch - search_space = self.hyperparameters["models"]["MultimodalTextModel"]["search_space"] - search_space["optimization.per_device_batch_size"] = self.per_device_batch_size - search_space["optimization.batch_size"] = self.batch_size - search_space["optimization.num_train_epochs"] = self.num_train_epochs + else: + raise ValueError(f"No {self.backend} backend, please choose mxnet or pytorch.") @staticmethod def load_args(): diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 63f7f7bc50..9f27f381e8 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -70,3 +70,66 @@ def test_multimodalestimator(): print("Try to run inference on validation set") score = automl.score(valid_dataset[feature_columns], valid_dataset["label"]) print(f"Inference on validation set complete, {metric}: {score}") + + +def test_pytorch_backend(): + if sys.version < "3.7": + # do not test on python3.6 + return + elif platform.system() == "Windows": + # do not test on windows with py3.8 + return + seed = 123 + metric = "r2" + train_data = { + "sentence1": [ + "Mary had a little lamb.", + "Its fleece was white as snow." + ], + "numerical1": [1, 2], + "label": [1, 2], + } + + valid_data = { + "sentence1": [ + "Mary had a little lamb.", + "Its fleece was white as snow." + ], + "numerical1": [1, 2], + "label": [1, 2], + } + train_dataset = pd.DataFrame(train_data) + valid_dataset = pd.DataFrame(valid_data) + + feature_columns = ["sentence1", "numerical1"] + automl = AutoML() + automl_settings = { + "gpu_per_trial": 0, + "max_iter": 2, + "time_budget": 30, + "task": "mm-regression", + "metric": "r2", + "seed": seed, + } + + automl_settings["ag_args"] = { + "output_dir": "test/ag_output/", + "backend": "pytorch", + "text_backbone": "google/electra-small-discriminator" + } + + automl.fit( + X_train=train_dataset[feature_columns], + y_train=train_dataset["label"], + X_val=valid_dataset[feature_columns], + y_val=valid_dataset["label"], + eval_method="holdout", + auto_augment=False, + **automl_settings + ) + automl.pickle("automl.pkl") + with open("automl.pkl", "rb") as f: + automl = pickle.load(f) + print("Try to run inference on validation set") + score = automl.score(valid_dataset[feature_columns], valid_dataset["label"]) + print(f"Inference on validation set complete, {metric}: {score}") From 2b150e71e04d06758bfe877749bf195006a982aa Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 19 May 2022 12:07:55 -0400 Subject: [PATCH 46/50] set pytorch backend to default --- flaml/nlp/utils.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index 6ed52d11bc..c75051b146 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -490,7 +490,7 @@ class AGArgs: output_dir: str = field( default="data/mm/output/", metadata={"help": "data dir", "required": True} ) - backend: str = field(default="mxnet", metadata={"help": "the backend of the multimodal model"}) + backend: str = field(default="pytorch", metadata={"help": "the backend of the multimodal model"}) text_backbone: str = field(default="electra_base", metadata={"help": "mxnet text backbone model"}) hf_model_checkpoint_name: str = field(default="google/electra-base-discriminator", metadata={"help": "HF model"}) multimodal_fusion_strategy: str = field(default="fuse_late", metadata={"help": "fusion strategy"}) @@ -504,10 +504,20 @@ def __post_init__(self): Get the preset using the AGArgs. Save as self.hyperparameters. Ref: https://auto.gluon.ai/0.3.1/tutorials/text_prediction/customization.html """ - from autogluon.text.text_prediction.legacy_presets import ag_text_presets - from autogluon.text.text_prediction.presets import get_text_preset + if self.backend == "pytorch": + from autogluon.text.text_prediction.presets import get_text_preset + + # get the override from the text preset tuple + self.hyperparameters = get_text_preset("default")[1] + + self.hyperparameters["model.hf_text.checkpoint_name"] = self.hf_model_checkpoint_name + self.hyperparameters["env.per_gpu_batch_size"] = self.per_device_batch_size + self.hyperparameters["env.batch_size"] = self.batch_size + self.hyperparameters["optimization.max_epochs"] = self.num_train_epochs + + elif self.backend == "mxnet": + from autogluon.text.text_prediction.legacy_presets import ag_text_presets - if self.backend == "mxnet": base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}' self.hyperparameters = ag_text_presets.create(base_key) # NOTE: set batch & epoch @@ -515,14 +525,6 @@ def __post_init__(self): search_space["optimization.per_device_batch_size"] = self.per_device_batch_size search_space["optimization.batch_size"] = self.batch_size search_space["optimization.num_train_epochs"] = self.num_train_epochs - elif self.backend == "pytorch": - # get the override from the text preset tuple - self.hyperparameters = get_text_preset("default")[1] - - self.hyperparameters["model.hf_text.checkpoint_name"] = self.hf_model_checkpoint_name - self.hyperparameters["env.per_gpu_batch_size"] = self.per_device_batch_size - self.hyperparameters["env.batch_size"] = self.batch_size - self.hyperparameters["optimization.max_epochs"] = self.num_train_epochs else: raise ValueError(f"No {self.backend} backend, please choose mxnet or pytorch.") From 505c89412d186ecd89038152cad1fa014b31fddb Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 19 May 2022 15:15:24 -0400 Subject: [PATCH 47/50] pytorch backend support only --- flaml/model.py | 60 ++++++---------------- flaml/nlp/utils.py | 46 +++++------------ setup.py | 1 - test/nlp/test_multimodalestimator.py | 77 ++-------------------------- 4 files changed, 31 insertions(+), 153 deletions(-) diff --git a/flaml/model.py b/flaml/model.py index c7083ce84a..c1ce3e95c4 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -2137,72 +2137,42 @@ def search_space(cls, **params): https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values """ search_space_dict = { - "model.network.agg_net.mid_units": { + "model.fusion_mlp.hidden_sizes": { "domain": tune.choice(list(range(32, 129))), "init_value": 128, }, - "optimization.lr": { + "optimization.learning_rate": { "domain": tune.loguniform(lower=1E-5, upper=1E-4), "init_value": 1E-4, }, - "optimization.wd": { + "optimization.weight_decay": { "domain": tune.choice([1E-4, 1E-3, 1E-2]), "init_value": 1E-4, }, - "optimization.warmup_portion": { + "optimization.warmup_steps": { "domain": tune.choice([0.1, 0.2]), "init_value": 0.1, }, } return search_space_dict - def _init_ag_args(self, automl_fit_kwargs: dict = None): - from .nlp.utils import AGArgs - - ag_args = AGArgs(**automl_fit_kwargs["ag_args"]) - # for key, val in automl_fit_kwargs["ag_args"].items(): - # assert ( - # key in ag_args.__dict__ - # ), "The specified key {} is not in the argument list of flaml.nlp.utils::AGArgs".format( - # key - # ) - # setattr(ag_args, key, val) - self.ag_args = ag_args - def fit(self, X_train=None, y_train=None, budget=None, **kwargs): from autogluon.text import TextPredictor + from .nlp.utils import AGArgs self._kwargs = kwargs - self._init_ag_args(kwargs) + self.ag_args = AGArgs(**kwargs["ag_args"]) seed = self._kwargs.get("seed", 123) # get & set the hyperparameters, update with self.params hyperparameters = self.ag_args.hyperparameters - if self.ag_args.backend == "mxnet": - search_space = hyperparameters["models"]["MultimodalTextModel"]["search_space"] - for key, value in self.params.items(): - # NOTE: FLAML uses np.float64 but AG uses float, need to transform - if key == "n_jobs": - continue - else: - search_space[key] = value.item() if isinstance(value, np.float64) else value - # elif using pytorch backend - else: - # TODO: if pytorch only, remove this mapper and modify the search space keys directly - # then AGargs in utils.py should be modify accordingly - KEY_MAPPER = { - "model.network.agg_net.mid_units": "model.fusion_mlp.hidden_sizes", - "optimization.lr": "optimization.learning_rate", - "optimization.wd": "optimization.weight_decay", - "optimization.warmup_portion": "warmup_steps", - } - for key, value in self.params.items(): - if key == "n_jobs": - continue - elif key == "model.network.agg_net.mid_units": - hyperparameters[KEY_MAPPER[key]] = [value] - else: - hyperparameters[key] = value.item() if isinstance(value, np.float64) else value + for key, value in self.params.items(): + if key == "n_jobs": + continue + elif key == "model.fusion_mlp.hidden_sizes": + hyperparameters[key] = [value] + else: + hyperparameters[key] = value.item() if isinstance(value, np.float64) else value start_time = time.time() self.model_path = os.path.join(self.ag_args.output_dir, self.trial_id) @@ -2211,7 +2181,7 @@ def fit(self, X_train=None, y_train=None, budget=None, **kwargs): label="label", problem_type=self._task[3:], eval_metric=kwargs["metric"], - backend=self.ag_args.backend, + backend="pytorch", verbosity=0) train_data = BaseEstimator._join(X_train, y_train) # use valid data for early stopping @@ -2253,7 +2223,7 @@ def predict_proba(self, X): def score(self, X_val: DataFrame, y_val: Series, **kwargs): from autogluon.text import TextPredictor - model = TextPredictor.load(path=self.model_path, backend=self.ag_args.backend) + model = TextPredictor.load(path=self.model_path, backend="pytorch") val_data = BaseEstimator._join(X_val, y_val) return model.evaluate(val_data) diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index c75051b146..ad77f71034 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -483,17 +483,13 @@ class AGArgs: The Autogluon configurations Args: output_dir (str): data root directory for outputing the log and intermediate data, model. - backend (str, optional, defaults to "mxnet"): currently only support to mxnet. - text_backbone (str, optional, defaults to "electra_base"): the text backbone model. - multimodal_fusion_strategy (str, optional, defaults to "fuse_late"): the fuse strategy. + hf_model_checkpoint_name (str, optional, defaults to "google/electra-base-discriminator"): the HF model checkpoint. + per_device_batch_size (int, optional, defaults to 8) + num_train_epochs (int, optional, defaults to 10) + batch_size (int, optional, defaults to 128) """ - output_dir: str = field( - default="data/mm/output/", metadata={"help": "data dir", "required": True} - ) - backend: str = field(default="pytorch", metadata={"help": "the backend of the multimodal model"}) - text_backbone: str = field(default="electra_base", metadata={"help": "mxnet text backbone model"}) - hf_model_checkpoint_name: str = field(default="google/electra-base-discriminator", metadata={"help": "HF model"}) - multimodal_fusion_strategy: str = field(default="fuse_late", metadata={"help": "fusion strategy"}) + output_dir: str = field(default="data/mm_output/", metadata={"help": "data dir", "required": True}) + hf_model_path: str = field(default="google/electra-base-discriminator", metadata={"help": "Hugging Face model path"}) per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"}) num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"}) batch_size: int = field(default=128, metadata={"help": "batch size"}) @@ -502,32 +498,16 @@ class AGArgs: def __post_init__(self): """ Get the preset using the AGArgs. Save as self.hyperparameters. - Ref: https://auto.gluon.ai/0.3.1/tutorials/text_prediction/customization.html """ - if self.backend == "pytorch": - from autogluon.text.text_prediction.presets import get_text_preset - - # get the override from the text preset tuple - self.hyperparameters = get_text_preset("default")[1] - - self.hyperparameters["model.hf_text.checkpoint_name"] = self.hf_model_checkpoint_name - self.hyperparameters["env.per_gpu_batch_size"] = self.per_device_batch_size - self.hyperparameters["env.batch_size"] = self.batch_size - self.hyperparameters["optimization.max_epochs"] = self.num_train_epochs + from autogluon.text.text_prediction.presets import get_text_preset - elif self.backend == "mxnet": - from autogluon.text.text_prediction.legacy_presets import ag_text_presets - - base_key = f'{self.text_backbone}_{self.multimodal_fusion_strategy}' - self.hyperparameters = ag_text_presets.create(base_key) - # NOTE: set batch & epoch - search_space = self.hyperparameters["models"]["MultimodalTextModel"]["search_space"] - search_space["optimization.per_device_batch_size"] = self.per_device_batch_size - search_space["optimization.batch_size"] = self.batch_size - search_space["optimization.num_train_epochs"] = self.num_train_epochs + # get the override from the text preset tuple + self.hyperparameters = get_text_preset("default")[1] - else: - raise ValueError(f"No {self.backend} backend, please choose mxnet or pytorch.") + self.hyperparameters["model.hf_text.checkpoint_name"] = self.hf_model_path + self.hyperparameters["env.per_gpu_batch_size"] = self.per_device_batch_size + self.hyperparameters["env.batch_size"] = self.batch_size + self.hyperparameters["optimization.max_epochs"] = self.num_train_epochs @staticmethod def load_args(): diff --git a/setup.py b/setup.py index 24a622b01f..73a2ef2abb 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,6 @@ "seqeval", ], "autogluon": [ - "mxnet<2.0.0", "autogluon.text==0.4.0", "autogluon.features==0.4.0", ], diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 9f27f381e8..4a0d14f454 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -10,77 +10,6 @@ def test_multimodalestimator(): - if sys.version < "3.7": - # do not test on python3.6 - return - elif platform.system() == "Windows": - # do not test on windows with py3.8 - return - - seed = 123 - metric = "r2" - train_data = { - "sentence1": [ - "Amrozi accused his brother of deliberately distorting his evidence.", - ], - "numerical1": [1], - "label": [1], - } - - valid_data = { - "sentence1": [ - "They had published an advertisement on the Internet on June 10.", - ], - "numerical1": [1], - "label": [1], - } - train_dataset = pd.DataFrame(train_data) - valid_dataset = pd.DataFrame(valid_data) - - feature_columns = ["sentence1", "numerical1"] - automl = AutoML() - automl_settings = { - "gpu_per_trial": 0, - "max_iter": 2, - "time_budget": 20, - "task": "mm-regression", - "metric": "r2", - "seed": seed, - } - - automl_settings["ag_args"] = { - "output_dir": "test/ag_output/", - "backend": "mxnet", - "text_backbone": "electra_small", - "multimodal_fusion_strategy": "fuse_late", - } - - automl.fit( - X_train=train_dataset[feature_columns], - y_train=train_dataset["label"], - X_val=valid_dataset[feature_columns], - y_val=valid_dataset["label"], - eval_method="holdout", - auto_augment=False, - **automl_settings - ) - automl.pickle("automl.pkl") - with open("automl.pkl", "rb") as f: - automl = pickle.load(f) - print("Try to run inference on validation set") - score = automl.score(valid_dataset[feature_columns], valid_dataset["label"]) - print(f"Inference on validation set complete, {metric}: {score}") - - -def test_pytorch_backend(): - if sys.version < "3.7": - # do not test on python3.6 - return - elif platform.system() == "Windows": - # do not test on windows with py3.8 - return - seed = 123 - metric = "r2" train_data = { "sentence1": [ "Mary had a little lamb.", @@ -102,6 +31,7 @@ def test_pytorch_backend(): valid_dataset = pd.DataFrame(valid_data) feature_columns = ["sentence1", "numerical1"] + metric = "r2" automl = AutoML() automl_settings = { "gpu_per_trial": 0, @@ -109,13 +39,12 @@ def test_pytorch_backend(): "time_budget": 30, "task": "mm-regression", "metric": "r2", - "seed": seed, + "seed": 123, } automl_settings["ag_args"] = { "output_dir": "test/ag_output/", - "backend": "pytorch", - "text_backbone": "google/electra-small-discriminator" + "hf_model_path": "google/electra-small-discriminator" } automl.fit( From 98ee13855d421e4453c7f97db0c1953300efac01 Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 19 May 2022 16:11:16 -0400 Subject: [PATCH 48/50] test remove os and python ver constraints --- .github/workflows/python-package.yml | 3 +-- flaml/automl.py | 4 ++-- flaml/data.py | 6 +++--- flaml/model.py | 4 ++-- flaml/nlp/utils.py | 4 ++-- test/nlp/test_multimodalestimator.py | 2 +- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 3179338ffc..a1dd082fad 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -47,8 +47,7 @@ jobs: run: | pip install -e .[ray,forecast] pip install 'tensorboardX<=2.2' - - name: If python version > 3.6 and not on windows, install autogluon - if: matrix.python-version >= '3.7' && (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') + - name: Install autogluon run: | pip install -e .[autogluon] - name: Lint with flake8 diff --git a/flaml/automl.py b/flaml/automl.py index 8a6864e8f3..b641b189cb 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -1692,7 +1692,7 @@ def _decide_split_type(self, split_type): len(np.unique(self._y_train_all)) ) elif self._state.task == "mm-classification": - self._state.task = "mm-" + get_classification_objective( + self._state.task = "mm-" + get_classification_objective( len(np.unique(self._y_train_all)) ) if not isinstance(split_type, str): @@ -2457,7 +2457,7 @@ def is_to_reverse_metric(metric, task): estimator_list = ["transformer"] # NOTE: if multimodal task, use multimodal estimator elif self._state.task in MM_TASKS: - estimator_list=["multimodal"] + estimator_list = ["multimodal"] else: try: import catboost diff --git a/flaml/data.py b/flaml/data.py index 0cadead32a..6b451e805e 100644 --- a/flaml/data.py +++ b/flaml/data.py @@ -50,9 +50,9 @@ TOKENCLASSIFICATION, ) MM_TASKS = ( - "mm-classification", - "mm-regression", - "mm-binary", + "mm-classification", + "mm-regression", + "mm-binary", "mm-multiclass",) diff --git a/flaml/model.py b/flaml/model.py index 08a4be47e4..9ee4f5dafb 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -2167,7 +2167,7 @@ def search_space(cls, **params): }, "optimization.warmup_steps": { "domain": tune.choice([0.1, 0.2]), - "init_value": 0.1, + "init_value": 0.1, }, } return search_space_dict @@ -2242,7 +2242,7 @@ def score(self, X_val: DataFrame, y_val: Series, **kwargs): model = TextPredictor.load(path=self.model_path, backend="pytorch") val_data = BaseEstimator._join(X_val, y_val) return model.evaluate(val_data) - + class suppress_stdout_stderr(object): def __init__(self): diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index 87dfa5a930..427f91fe5a 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -494,7 +494,7 @@ class AGArgs: hf_model_path: str = field(default="google/electra-base-discriminator", metadata={"help": "Hugging Face model path"}) per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"}) num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"}) - batch_size: int = field(default=128, metadata={"help": "batch size"}) + batch_size: int = field(default=128, metadata={"help": "batch size"}) hyperparameters: dict = field(init=False) def __post_init__(self): @@ -502,7 +502,7 @@ def __post_init__(self): Get the preset using the AGArgs. Save as self.hyperparameters. """ from autogluon.text.text_prediction.presets import get_text_preset - + # get the override from the text preset tuple self.hyperparameters = get_text_preset("default")[1] diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index 4a0d14f454..f50f63cf22 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -29,7 +29,7 @@ def test_multimodalestimator(): } train_dataset = pd.DataFrame(train_data) valid_dataset = pd.DataFrame(valid_data) - + feature_columns = ["sentence1", "numerical1"] metric = "r2" automl = AutoML() From ff8c078f2f595a271b2094cc15343310f17e94df Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 19 May 2022 16:29:26 -0400 Subject: [PATCH 49/50] no support for python 3.6 --- .github/workflows/python-package.yml | 3 ++- test/nlp/test_multimodalestimator.py | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index a1dd082fad..3404d1824b 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -47,7 +47,8 @@ jobs: run: | pip install -e .[ray,forecast] pip install 'tensorboardX<=2.2' - - name: Install autogluon + - name: If python version > 3.6, install autogluon + if: matrix.python-version >= '3.7' run: | pip install -e .[autogluon] - name: Lint with flake8 diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index f50f63cf22..f8888d2576 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -10,6 +10,9 @@ def test_multimodalestimator(): + if sys.version < "3.7": + # do not test on python3.6 + return train_data = { "sentence1": [ "Mary had a little lamb.", From 24a5333dd21ffebdb9877ab429921866754b4444 Mon Sep 17 00:00:00 2001 From: Varia Date: Thu, 19 May 2022 16:59:44 -0400 Subject: [PATCH 50/50] no support for python 3.6 or windows --- .github/workflows/python-package.yml | 4 ++-- test/nlp/test_multimodalestimator.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 3404d1824b..3179338ffc 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -47,8 +47,8 @@ jobs: run: | pip install -e .[ray,forecast] pip install 'tensorboardX<=2.2' - - name: If python version > 3.6, install autogluon - if: matrix.python-version >= '3.7' + - name: If python version > 3.6 and not on windows, install autogluon + if: matrix.python-version >= '3.7' && (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') run: | pip install -e .[autogluon] - name: Lint with flake8 diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py index f8888d2576..b80bf41138 100644 --- a/test/nlp/test_multimodalestimator.py +++ b/test/nlp/test_multimodalestimator.py @@ -13,6 +13,9 @@ def test_multimodalestimator(): if sys.version < "3.7": # do not test on python3.6 return + elif platform.system() == "Windows": + # do not test on windows + return train_data = { "sentence1": [ "Mary had a little lamb.",