Source code for causallib.estimation.rlearner

"""
(C) Copyright 2021 IBM Corp.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Created on April 4, 2021
"""
import warnings
import pandas as pd
import numpy as np
from sklearn.utils.multiclass import type_of_target

from .base_estimator import IndividualOutcomeEstimator

from ..utils.crossfit import cross_fitting
from ..utils import general_tools as g_tools


[docs]class VotingEstimator: def __init__(self, estimators): self.estimators = estimators def _stack_predictions(self, X): """Collect results from clf.predict_proba or ref.predict calls.""" if hasattr(self.estimators[0], "predict_proba"): predictions = [est.predict_proba(X)[:, 1] for est in self.estimators] else: predictions = [est.predict(X) for est in self.estimators] predictions = np.stack(predictions, axis=-1) return predictions
[docs] def predict(self, X): """Aggregate results of different estimators""" predicted_target = self._stack_predictions(X) averaged_target = np.average(predicted_target, axis=-1) return pd.Series(averaged_target, index=X.index)
[docs]class RLearner(IndividualOutcomeEstimator): """ Given the measured outcome Y, the assignment A, and the coefficients X calculate an R-learner estimator of the effect of the treatment Let e(X) be the estimated propensity score and m(X) is the estimated outcome (E[Y|X]) by an estimator, then the R-learner minimize the following: ||Y - m(X) - (A-e(X))\tau(X)||^2_2 + lambda (\tau) where \tau(X) is a conditional average treatment effect and lambda is a regularize coefficient. If the effect_model is Linear, than minimizing squared loss with the target variable (Y-m(X)) and the features (A-e(X))X, otherwise it corresponds to a weighted regression problem, where the weights are (A-e(X))**2. This can be used with any scikit-learn regressor that accepts sample weights References: Nie, X., & Wager, S.(2017). Quasi - oracle estimation of heterogeneous treatment effects https://arxiv.org/abs/1712.04912 Chernozhukov, V., et al. (2018). Double/debiased machine learning for treatment and structural parameters.‏ https://academic.oup.com/ectj/article/21/1/C1/5056401 """ def __init__( self, effect_model, outcome_model, treatment_model, outcome_covariates=None, treatment_covariates=None, effect_covariates=None, n_splits=5, refit=True, caliper=1e-6, non_parametric=False, ): """ Args: effect_model: An sklearn model that estimate that estimate the conditional average treatment effect \tau(X) outcome_model: An sklearn model that estimate the regressor Y|X (without the treatment). Note: it is recommended to use a regressor, even for binary outcome. treatment_model: An sklearn model that estimate the treatment model or the probability to be treated, i.e A|X or P(A=1|X) outcome_covariates (array): Covariates to use for the outcome model. If None - all covariates passed will be used. Either list of column names or boolean mask. treatment_covariates (array): Covariates to use for treatment model. If None - all covariates passed will be used. Either list of column names or boolean mask. effect_covariates (array): Covariates to use for the effect model. If None - all covariates passed will be used. Either list of column names or boolean mask. n_splits (int): number of sample-splitting in the cross-fitting procedure refit (bool): if True - Nuisance models are fitted over the whole training set, otherwise Nuisance models are fitted per folds non_parametric(bool): if True - the effect_model is estimated as weighted regression task, otherwise the effect_model is considered linear. """ super(RLearner, self).__init__(lambda **x: None) # Dummy initialization delattr(self, "learner") # To remove the learner attribute a # IndividualOutcomeEstimator has self.effect_model = effect_model self.outcome_model = outcome_model self.treatment_model = treatment_model self.outcome_covariates = outcome_covariates self.treatment_covariates = treatment_covariates self.effect_covariates = effect_covariates self.n_splits = n_splits self.refit = refit self.caliper = caliper self.non_parametric = non_parametric def __repr__(self): repr_string = ( "{cls_name}(\n " "outcome_model={outcome},\n " "treatment_model={treatment},\n " "effect_model={effect})" ).format( cls_name=self.__class__.__name__, outcome=self.outcome_model, treatment=self.treatment_model, effect=self.effect_model, ) return repr_string def _extract_outcome_model_data(self, X): outcome_covariates = self.outcome_covariates or X.columns X_outcome = X[outcome_covariates] return X_outcome def _extract_treatment_model_data(self, X): treatment_covariates = self.treatment_covariates or X.columns X_treatment = X[treatment_covariates] return X_treatment def _extract_effect_model_data(self, X): X_effect = pd.Series(data=1, index=X.index, name="intercept_").to_frame() if not isinstance(self.effect_covariates, list) or self.effect_covariates: effect_covariates = self.effect_covariates or X.columns X_effect = pd.concat([X_effect, X[effect_covariates]], axis="columns") return X_effect def _prepare_data(self, X): """ Extract the relevant parts for outcome model, treatment model and effect model for the entire data matrix Args: X (pd.DataFrame): Covariate matrix of size (num_subjects, num_features). Returns: (pd.DataFrame, pd.DataFrame, pd.DataFrame): X_outcome, X_treatment, X_effect Data matrix for outcome model, treatment model and data effect model """ X_outcome = self._extract_outcome_model_data(X) X_treatment = self._extract_treatment_model_data(X) X_effect = self._extract_effect_model_data(X) return X_outcome, X_treatment, X_effect def _fit_and_predict_model(self, model, X, target, predict_proba): """ fit outcome model (E[Y|X]) or treatment model (E[A|X] or P[A=1|X]) and produce predictions on held-out data subsets Args: model (object): Treatment or outcome model X (pd.DataFrame): Covariate matrix of size (num_subjects, num_features). target (pd.Series): Observed target (outcome or treatment) of size (num_subjects,). predict_proba (bool): if True predict probabilities with predict_proba, otherwise use predict Returns: pd.Series: cross-fitted prediction of the outcome """ target_pred, estimators = cross_fitting( model, X, target, n_splits=self.n_splits, predict_proba=predict_proba ) target_pred = pd.Series(data=target_pred, index=X.index) if self.refit: estimators = [model.fit(X, target)] return target_pred, estimators def _fit_and_predict_nuisance(self, X_outcome, X_treatment, a, y): """ fit the nuisance models and return residuals Args: X_outcome (pd.DataFrame): Covariate matrix of size (num_subjects, num_features). X_treatment (pd.DataFrame): Covariate matrix of size (num_subjects, num_features). a (pd.Series): Treatment assignment of size (num_subjects,). y (pd.Series): Observed outcome of size (num_subjects,). Returns: pd.Series, pd.Series: residuals of the outcome model, residuals of the treatment model """ # residuals of the outcome model pred_y, outcome_model_ = self._fit_and_predict_model( self.outcome_model, X_outcome, y, predict_proba=False ) res_y = y - pred_y self.outcome_model_ = VotingEstimator(outcome_model_) # residuals of the treatment model pred_a, treatment_model_ = self._fit_and_predict_model( self.treatment_model, X_treatment, a, predict_proba=self.binary_treatment_ ) res_a = a - pred_a self.treatment_model_ = VotingEstimator(treatment_model_) return res_y, res_a
[docs] def estimate_individual_effect(self, X): """ Predict the individual treatment effect Args: X (pd.DataFrame): Covariate matrix of size (num_subjects, num_features). Returns: pd.Series: The series is a vector in size (num_subjects) that contains the estimated treatment effect, each row is an individual """ X_effect = self._extract_effect_model_data(X) return pd.Series(self.effect_model.predict(X_effect), index=X.index)
[docs] def estimate_individual_outcome(self, X, a, treatment_values=None, predict_proba=False): """ Estimating corrected individual counterfactual outcomes. Args: X (pd.DataFrame): Covariate matrix of size (num_subjects, num_features). a (pd.Series): Treatment assignment of size (num_subjects,). treatment_values (Any): Desired treatment value/s to use when estimating the counterfactual outcome. If not supplied, calculates for all available treatment values. predict_proba: IGNORED. Not used, present for API consistency by convention. Returns: pd.DataFrame: DataFrame which columns are treatment values and rows are individuals: each column is a vector size (num_samples,) that contains the estimated outcome for each individual under the treatment value in the corresponding key. """ if (not self.binary_treatment_) & (treatment_values is None): raise ValueError( "The individual_outcome cannot be represented " "for a continuous treatment effect.\n Choose " 'treatment in "treatment_values" ' ) X_outcome = self._extract_outcome_model_data(X) X_treatment = self._extract_treatment_model_data(X) outcome_pred = self.outcome_model_.predict(X_outcome) treatment_pred = self.treatment_model_.predict(X_treatment) effect_pred = self.estimate_individual_effect(X) treatment_values = g_tools.get_iterable_treatment_values(treatment_values, a) individual_cf = { treatment_value: (treatment_value - treatment_pred) * effect_pred + outcome_pred for treatment_value in treatment_values } return pd.DataFrame(individual_cf)
def _fit_linear_effect_model(self, X, res_a, res_y): """ fit a linear effect model without fitting an intercept Args: X (pd.DataFrame): Covariate matrix of size (num_subjects, num_features). res_a (pd.Series): residuals of the treatment model res_y (pd.Series): residuals of the outcome model """ if hasattr(self.effect_model, "fit_intercept"): if self.effect_model.fit_intercept: self.effect_model.fit_intercept = False warnings.warn( "The effect model forces intercept estimation as an " "additional coefficient. Therefore, the explicit " "`fit_intercept` attribute was set to False." ) else: warnings.warn( "`non_parametric=False` was passed and `effect_model` " "does not have a `fit_intercept` attribute and therefore " "does not seem to be scikit-learn LinearModel. " "Ignore this warning if you are sure `effect_model` " "is a linear model." ) # multiply each row by a scalar X_tilde = X * np.array(res_a)[:, np.newaxis] self.effect_model.fit(X_tilde, res_y) def _fit_non_parametric_effect_model(self, X, res_a, res_y, caliper): """ fit a non-parametric effect model as a weighted regression. Args: X (pd.DataFrame): Covariate matrix of size (num_subjects, num_features). res_a (pd.Series): residuals of the treatment model res_y (pd.Series): residuals of the outcome model caliper (None | float): minimal value of treatment-probability residual """ if hasattr(self.effect_model, "fit_intercept"): warnings.warn( "`non_parametric=True` was passed, but `effect_model` has a " "`fit_intercept` attributes and therefore seems to be a" "scikit-learn LinearModel. Ignore this warning if you're sure" "`effect_model` is a nonparametric model" ) caliper_ = caliper or self.caliper if caliper_ is not None: cond_clipping = np.abs(res_a) < caliper_ res_a.loc[cond_clipping] = caliper_ * np.sign(res_a.loc[cond_clipping]) self.effect_model.fit(X, (res_y / res_a), sample_weight=res_a ** 2)
[docs] def fit(self, X, a, y, caliper=None): """ Args: X (pd.DataFrame): Covariate matrix of size (num_subjects, num_features). a (pd.Series): Treatment assignment of size (num_subjects,). y (pd.Series): Observed outcome of size (num_subjects,). caliper (None | float): minimal value of treatment-probability residual. used to avoid division by zero when fitting the effect-model. If None - no clipping is done. The caliper is irrelevant if the effect_model is Linear. """ self.binary_treatment_ = type_of_target(a) == "binary" X_outcome, X_treatment, X_effect = self._prepare_data(X) res_y, res_a = self._fit_and_predict_nuisance(X_outcome, X_treatment, a, y) if self.non_parametric: self._fit_non_parametric_effect_model(X_effect, res_a, res_y, caliper) else: self._fit_linear_effect_model(X_effect, res_a, res_y) return self