Skip to content

Commit

Permalink
support option to not fit_target_curves
Browse files Browse the repository at this point in the history
  • Loading branch information
csinva committed Mar 13, 2024
1 parent 05ccf62 commit dec7a15
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
20 changes: 18 additions & 2 deletions imodels/algebraic/gam_multitask.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ def __init__(
linear_penalty='ridge',
onehot_prior=False,
renormalize_features=False,
use_normalize_feature_targets=False,
use_internal_classifiers=False,
fit_target_curves=True,
random_state=42,
):
"""
Expand All @@ -56,17 +58,25 @@ def __init__(
features predicting the target should have coef 1
renormalize_features: bool
If True, renormalize the features before fitting the linear model
use_normalize_feature_targets: bool
whether to normalize the features used as targets for internal EBMs
(does not apply to target columns)
If input features are normalized already, this has no effect
use_internal_classifiers: bool
whether to use internal classifiers (as opposed to regressors)
fit_target_curves: bool
whether to fit an EBM to predict the target
"""
self.ebm_kwargs = ebm_kwargs
self.multitask = multitask
self.linear_penalty = linear_penalty
self.random_state = random_state
self.interactions = interactions
self.onehot_prior = onehot_prior
self.use_normalize_feature_targets = use_normalize_feature_targets
self.renormalize_features = renormalize_features
self.use_internal_classifiers = use_internal_classifiers
self.fit_target_curves = fit_target_curves

# override ebm_kwargs
ebm_kwargs['random_state'] = random_state
Expand All @@ -75,6 +85,9 @@ def __init__(
def fit(self, X, y, sample_weight=None):
X, y = check_X_y(X, y, accept_sparse=False, multi_output=True)
self.n_outputs_ = 1 if len(y.shape) == 1 else y.shape[1]
if self.n_outputs_ > 1 and not self.fit_target_curves:
raise ValueError(
"fit_target_curves must be True when n_outputs > 1")
if isinstance(self, ClassifierMixin):
check_classification_targets(y)
if self.n_outputs_ == 1:
Expand Down Expand Up @@ -121,11 +134,14 @@ def fit(self, X, y, sample_weight=None):
self.ebms_.append(self._initialize_ebm_internal(y_))
if isinstance(self, ClassifierMixin):
_, y_ = np.unique(y_, return_inverse=True)
elif self.use_normalize_feature_targets:
y_ = StandardScaler().fit_transform(y_.reshape(-1, 1)).ravel()
self.ebms_[task_num].fit(X_, y_, sample_weight=sample_weight)

# also fit an EBM to the target
self.ebms_.append(self._initialize_ebm_internal(y))
self.ebms_[num_features].fit(X, y, sample_weight=sample_weight)
if self.fit_target_curves:
self.ebms_.append(self._initialize_ebm_internal(y))
self.ebms_[num_features].fit(X, y, sample_weight=sample_weight)
elif self.n_outputs_ > 1:
# with multiple outputs, we fit an EBM to each output
for task_num in tqdm(range(self.n_outputs_)):
Expand Down
6 changes: 4 additions & 2 deletions tests/gam_multitask_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,15 @@ def compare_models():

results = defaultdict(list)
for gam in tqdm([
MultiTaskGAMRegressor(),
MultiTaskGAMRegressor(fit_target_curves=False),
# AdaBoostRegressor(estimator=MultiTaskGAMRegressor(
# multitask=True), n_estimators=2),
# MultiTaskGAMRegressor(multitask=True, onehot_prior=True),
# MultiTaskGAMRegressor(multitask=True, onehot_prior=False),
# MultiTaskGAMRegressor(multitask=True, renormalize_features=True),
MultiTaskGAMRegressor(multitask=True, renormalize_features=False),
MultiTaskGAMRegressor(multitask=True, use_internal_classifiers=True),
# MultiTaskGAMRegressor(multitask=True, renormalize_features=False),
# MultiTaskGAMRegressor(multitask=True, use_internal_classifiers=True),
# ExplainableBoostingRegressor(n_jobs=1, interactions=0)
]):
np.random.seed(42)
Expand Down

0 comments on commit dec7a15

Please sign in to comment.