-
Notifications
You must be signed in to change notification settings - Fork 5
Open
Description
After adding new features we have some issued to be solved:
(1) Multiple casting to float in the numeric varibles:
features_specs = {
# ======= NUMERICAL Features =========================
"feat1": FeatureType.FLOAT_NORMALIZED,
"feat2": FeatureType.FLOAT_RESCALED,
# ======= CATEGORICAL Features ========================
"feat3": FeatureType.STRING_CATEGORICAL,
"feat4": FeatureType.INTEGER_CATEGORICAL,
# ======= TEXT Features ========================
"feat5": FeatureType.TEXT,
}
ppr = PreprocessingModel(
path_data="data/my_data.csv",
features_specs=features_specs,
overwrite_stats=True,
use_distribution_aware=True, # Enable Distribution-Aware Encoding
# distribution_aware_bins=1000, # Set number of bins for finer data encoding
)
# construct the preprocessing pipelines
ppr.build_preprocessor()
ppr.plot_model("model_architecture.png")gives:
(2) Features selection layer (need to add to Text Features and Dates features
features_specs2 = {
# ======= NUMERICAL Features =========================
# _using the FeatureType
"feat1": FeatureType.FLOAT_NORMALIZED,
"feat2": FeatureType.FLOAT_RESCALED,
# _using the NumericalFeature with custom attributes
"feat3": NumericalFeature(
name="feat3",
feature_type=FeatureType.FLOAT_DISCRETIZED,
bin_boundaries=[0.0, 1.0, 2.0],
),
"feat4": NumericalFeature(
name="feat4",
feature_type=FeatureType.FLOAT,
),
# directly by string name
"feat5": "float",
# ======= CATEGORICAL Features ========================
# _using the FeatureType
"feat6": FeatureType.STRING_CATEGORICAL,
# _using the CategoricalFeature with custom attributes
"feat7": CategoricalFeature(
name="feat7",
feature_type=FeatureType.INTEGER_CATEGORICAL,
embedding_size=100,
),
# ======= TEXT Features ========================
"feat8": TextFeature(
name="feat8",
max_tokens=100,
stop_words=["stop", "next"],
),
# ======= DATE Features ========================
"feat10": DateFeature(
name="feat10",
feature_type=FeatureType.DATE,
date_format="%Y-%m-%d",
output_format="year",
),
# ======== CUSTOM PIPELINE ========================
"feat9": Feature(
name="feat9",
feature_type=FeatureType.FLOAT_NORMALIZED,
preprocessors=[
tf.keras.layers.Rescaling,
tf.keras.layers.Normalization,
],
# leyers required kwargs
scale=1,
),
}
ppr = PreprocessingModel(
path_data="data/my_data.csv",
features_specs=features_specs2,
overwrite_stats=True,
# FEATURES SELECTION
feature_selection_placement="all_features", # Choose between (all_features|numeric|categorical)
feature_selection_units=32,
feature_selection_dropout=0.15,
)
# construct the preprocessing pipelines
ppr.build_preprocessor()gives:
Metadata
Metadata
Assignees
Labels
No labels
