diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index cebff3fbe1..3179338ffc 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -47,6 +47,10 @@ jobs: run: | pip install -e .[ray,forecast] pip install 'tensorboardX<=2.2' + - name: If python version > 3.6 and not on windows, install autogluon + if: matrix.python-version >= '3.7' && (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') + run: | + pip install -e .[autogluon] - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names diff --git a/flaml/automl.py b/flaml/automl.py index e3ebaa40ac..db18c84f41 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -47,6 +47,7 @@ REGRESSION, _is_nlp_task, NLG_TASKS, + MM_TASKS, ) from . import tune from .training_log import training_log_reader, training_log_writer @@ -1690,6 +1691,10 @@ def _decide_split_type(self, split_type): self._state.task = get_classification_objective( len(np.unique(self._y_train_all)) ) + elif self._state.task == "mm-classification": + self._state.task = "mm-" + get_classification_objective( + len(np.unique(self._y_train_all)) + ) if not isinstance(split_type, str): assert hasattr(split_type, "split") and hasattr( split_type, "get_n_splits" @@ -2452,6 +2457,9 @@ def is_to_reverse_metric(metric, task): estimator_list = ["lgbm", "xgboost", "xgb_limitdepth"] elif _is_nlp_task(self._state.task): estimator_list = ["transformer"] + # NOTE: if multimodal task, use multimodal estimator + elif self._state.task in MM_TASKS: + estimator_list = ["multimodal"] else: try: import catboost diff --git a/flaml/data.py b/flaml/data.py index 149cd8983c..6b451e805e 100644 --- a/flaml/data.py +++ b/flaml/data.py @@ -23,9 +23,12 @@ SEQCLASSIFICATION, MULTICHOICECLASSIFICATION, TOKENCLASSIFICATION, + "mm-binary", + "mm-multiclass", + "mm-classification", ) SEQREGRESSION = "seq-regression" -REGRESSION = ("regression", SEQREGRESSION) +REGRESSION = ("regression", SEQREGRESSION, "mm-regression") TS_FORECASTREGRESSION = ( "forecast", "ts_forecast", @@ -46,6 +49,11 @@ MULTICHOICECLASSIFICATION, TOKENCLASSIFICATION, ) +MM_TASKS = ( + "mm-classification", + "mm-regression", + "mm-binary", + "mm-multiclass",) def _is_nlp_task(task): @@ -245,7 +253,6 @@ def concat(X1, X2): class DataTransformer: """Transform input training data.""" - def fit_transform(self, X: Union[DataFrame, np.array], y, task): """Fit transformer and process the input training data according to the task type. @@ -269,6 +276,10 @@ def fit_transform(self, X: Union[DataFrame, np.array], y, task): if len(str_columns) > 0: X[str_columns] = X[str_columns].astype("string") self._str_columns = str_columns + # NOTE: if multimodal task, no preprocessing on X + elif task in MM_TASKS: + for column in X.columns: + X[column].astype("object") elif isinstance(X, DataFrame): X = X.copy() n = X.shape[0] @@ -395,6 +406,9 @@ def transform(self, X: Union[DataFrame, np.array]): # ids (input ids, token type id, attention mask, etc.) if len(self._str_columns) > 0: X[self._str_columns] = X[self._str_columns].astype("string") + elif self._task in MM_TASKS: + for column in X.columns: + X[column].astype("category") elif isinstance(X, DataFrame): cat_columns, num_columns, datetime_columns = ( self._cat_columns, diff --git a/flaml/ml.py b/flaml/ml.py index 092a02565e..384fcd1d5b 100644 --- a/flaml/ml.py +++ b/flaml/ml.py @@ -37,6 +37,7 @@ ARIMA, SARIMAX, TransformersEstimator, + MultiModalEstimator, TransformersEstimatorModelSelection, ) from .data import CLASSIFICATION, group_counts, TS_FORECAST, TS_VALUE_COL @@ -122,6 +123,8 @@ def get_estimator_class(task, estimator_name): estimator_class = SARIMAX elif estimator_name == "transformer": estimator_class = TransformersEstimator + elif estimator_name == "multimodal": + estimator_class = MultiModalEstimator elif estimator_name == "transformer_ms": estimator_class = TransformersEstimatorModelSelection else: @@ -584,7 +587,7 @@ def compute_estimator( n_jobs=n_jobs, ) - if isinstance(estimator, TransformersEstimator): + if isinstance(estimator, (TransformersEstimator, MultiModalEstimator)): fit_kwargs["metric"] = eval_metric fit_kwargs["X_val"] = X_val fit_kwargs["y_val"] = y_val @@ -650,6 +653,8 @@ def train_estimator( ) if isinstance(estimator, TransformersEstimator): fit_kwargs["metric"] = eval_metric + elif isinstance(estimator, MultiModalEstimator): + fit_kwargs["metric"] = eval_metric if X_train is not None: train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs) diff --git a/flaml/model.py b/flaml/model.py index 78d423f4c1..9ee4f5dafb 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -32,6 +32,7 @@ SUMMARIZATION, NLG_TASKS, MULTICHOICECLASSIFICATION, + MM_TASKS ) try: @@ -133,6 +134,13 @@ def estimator(self): def _preprocess(self, X): return X + @staticmethod + def _join(X_train, y_train): + y_train = DataFrame(y_train, index=X_train.index) + y_train.columns = ["label"] + train_df = X_train.join(y_train) + return train_df + def _fit(self, X_train, y_train, **kwargs): current_time = time.time() @@ -2127,6 +2135,115 @@ class XGBoostLimitDepth_TS(TS_SKLearn): base_class = XGBoostLimitDepthEstimator +class MultiModalEstimator(BaseEstimator): + """ + The class for tuning AutoGluon TextPredictor + """ + def __init__(self, task="binary", **config): + super().__init__(task, **config) + import uuid + + self.trial_id = str(uuid.uuid1().hex)[:8] + + @classmethod + def search_space(cls, **params): + """ + Add the possible search space configs here, e.g. 'optimization.lr' + reference: + https://auto.gluon.ai/stable/tutorials/text_prediction/customization.html#custom-hyperparameter-values + """ + search_space_dict = { + "model.fusion_mlp.hidden_sizes": { + "domain": tune.choice(list(range(32, 129))), + "init_value": 128, + }, + "optimization.learning_rate": { + "domain": tune.loguniform(lower=1E-5, upper=1E-4), + "init_value": 1E-4, + }, + "optimization.weight_decay": { + "domain": tune.choice([1E-4, 1E-3, 1E-2]), + "init_value": 1E-4, + }, + "optimization.warmup_steps": { + "domain": tune.choice([0.1, 0.2]), + "init_value": 0.1, + }, + } + return search_space_dict + + def fit(self, X_train=None, y_train=None, budget=None, **kwargs): + from autogluon.text import TextPredictor + from .nlp.utils import AGArgs + + self._kwargs = kwargs + self.ag_args = AGArgs(**kwargs["ag_args"]) + seed = self._kwargs.get("seed", 123) + + # get & set the hyperparameters, update with self.params + hyperparameters = self.ag_args.hyperparameters + for key, value in self.params.items(): + if key == "n_jobs": + continue + elif key == "model.fusion_mlp.hidden_sizes": + hyperparameters[key] = [value] + else: + hyperparameters[key] = value.item() if isinstance(value, np.float64) else value + + start_time = time.time() + self.model_path = os.path.join(self.ag_args.output_dir, self.trial_id) + assert self._task in MM_TASKS, f"The task is not multimodal, but {self._task}. " + model = TextPredictor(path=self.model_path, + label="label", + problem_type=self._task[3:], + eval_metric=kwargs["metric"], + backend="pytorch", + verbosity=0) + train_data = BaseEstimator._join(X_train, y_train) + # use valid data for early stopping + X_val = kwargs.get("X_val") + y_val = kwargs.get("y_val") + if X_val is not None and y_val is not None: + tuning_data = BaseEstimator._join(X_val, y_val) + else: + tuning_data = None + # NOTE: if no tuning_data, model.fit() will holdout a fraction from train_data for early stopping + model.fit(train_data=train_data, + tuning_data=tuning_data, + hyperparameters=hyperparameters, + num_gpus=kwargs.get("gpu_per_trial", None), + time_limit=budget, + seed=seed) + + training_time = time.time() - start_time + return training_time + + def predict(self, X): + from autogluon.text import TextPredictor + + model = TextPredictor.load(path=self.model_path, backend="pytorch") + output = model.predict(X, as_pandas=False) + return output + + def predict_proba(self, X): + from autogluon.text import TextPredictor + + # only works for classification tasks + assert ( + self._task in CLASSIFICATION + ), "predict_proba() only for classification tasks." + model = TextPredictor.load(path=self.model_path, backend="pytorch") + output = model.predict_proba(X, as_pandas=False) + return output + + def score(self, X_val: DataFrame, y_val: Series, **kwargs): + from autogluon.text import TextPredictor + + model = TextPredictor.load(path=self.model_path, backend="pytorch") + val_data = BaseEstimator._join(X_val, y_val) + return model.evaluate(val_data) + + class suppress_stdout_stderr(object): def __init__(self): # Open a pair of null files diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index cd2e7a409e..427f91fe5a 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -1,3 +1,5 @@ +import argparse +from dataclasses import dataclass, field from itertools import chain from typing import Dict, Any import numpy as np @@ -475,3 +477,57 @@ def _set_model_config(checkpoint_path): model_config = _set_model_config(checkpoint_path) this_model = get_this_model(checkpoint_path, task, model_config) return this_model + + +@dataclass +class AGArgs: + """ + The Autogluon configurations + Args: + output_dir (str): data root directory for outputing the log and intermediate data, model. + hf_model_checkpoint_name (str, optional, defaults to "google/electra-base-discriminator"): the HF model checkpoint. + per_device_batch_size (int, optional, defaults to 8) + num_train_epochs (int, optional, defaults to 10) + batch_size (int, optional, defaults to 128) + """ + output_dir: str = field(default="data/mm_output/", metadata={"help": "data dir", "required": True}) + hf_model_path: str = field(default="google/electra-base-discriminator", metadata={"help": "Hugging Face model path"}) + per_device_batch_size: int = field(default=8, metadata={"help": "per device batch size"}) + num_train_epochs: int = field(default=10, metadata={"help": "number of train epochs"}) + batch_size: int = field(default=128, metadata={"help": "batch size"}) + hyperparameters: dict = field(init=False) + + def __post_init__(self): + """ + Get the preset using the AGArgs. Save as self.hyperparameters. + """ + from autogluon.text.text_prediction.presets import get_text_preset + + # get the override from the text preset tuple + self.hyperparameters = get_text_preset("default")[1] + + self.hyperparameters["model.hf_text.checkpoint_name"] = self.hf_model_path + self.hyperparameters["env.per_gpu_batch_size"] = self.per_device_batch_size + self.hyperparameters["env.batch_size"] = self.batch_size + self.hyperparameters["optimization.max_epochs"] = self.num_train_epochs + + @staticmethod + def load_args(): + from dataclasses import fields + + arg_parser = argparse.ArgumentParser() + for each_field in fields(AGArgs): + arg_parser.add_argument( + "--" + each_field.name, + type=each_field.type, + help=each_field.metadata["help"], + required=each_field.metadata["required"] + if "required" in each_field.metadata + else False, + choices=each_field.metadata["choices"] + if "choices" in each_field.metadata + else None, + default=each_field.default, + ) + console_args, unknown = arg_parser.parse_known_args() + return console_args diff --git a/setup.py b/setup.py index 907f1fe50f..73a2ef2abb 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,10 @@ "hcrystalball==0.1.10", "seqeval", ], + "autogluon": [ + "autogluon.text==0.4.0", + "autogluon.features==0.4.0", + ], "catboost": ["catboost>=0.26"], "blendsearch": ["optuna==2.8.0"], "ray": [ diff --git a/test/nlp/test_multimodalestimator.py b/test/nlp/test_multimodalestimator.py new file mode 100644 index 0000000000..b80bf41138 --- /dev/null +++ b/test/nlp/test_multimodalestimator.py @@ -0,0 +1,70 @@ +from flaml import AutoML +import pandas as pd +import numpy as np +import os +import sys +import platform +import pickle +from sklearn.model_selection import train_test_split +os.environ["AUTOGLUON_TEXT_TRAIN_WITHOUT_GPU"] = "1" + + +def test_multimodalestimator(): + if sys.version < "3.7": + # do not test on python3.6 + return + elif platform.system() == "Windows": + # do not test on windows + return + train_data = { + "sentence1": [ + "Mary had a little lamb.", + "Its fleece was white as snow." + ], + "numerical1": [1, 2], + "label": [1, 2], + } + + valid_data = { + "sentence1": [ + "Mary had a little lamb.", + "Its fleece was white as snow." + ], + "numerical1": [1, 2], + "label": [1, 2], + } + train_dataset = pd.DataFrame(train_data) + valid_dataset = pd.DataFrame(valid_data) + + feature_columns = ["sentence1", "numerical1"] + metric = "r2" + automl = AutoML() + automl_settings = { + "gpu_per_trial": 0, + "max_iter": 2, + "time_budget": 30, + "task": "mm-regression", + "metric": "r2", + "seed": 123, + } + + automl_settings["ag_args"] = { + "output_dir": "test/ag_output/", + "hf_model_path": "google/electra-small-discriminator" + } + + automl.fit( + X_train=train_dataset[feature_columns], + y_train=train_dataset["label"], + X_val=valid_dataset[feature_columns], + y_val=valid_dataset["label"], + eval_method="holdout", + auto_augment=False, + **automl_settings + ) + automl.pickle("automl.pkl") + with open("automl.pkl", "rb") as f: + automl = pickle.load(f) + print("Try to run inference on validation set") + score = automl.score(valid_dataset[feature_columns], valid_dataset["label"]) + print(f"Inference on validation set complete, {metric}: {score}")