"""Predictions from single folds.
Predictions are generated by predictors for causal models. They contain the estimates
for single folds and are combined in the EvaluationResults objects for further analysis.
"""
from collections import namedtuple
from typing import Union
import warnings
import pandas as pd
from ..utils.stat_utils import robust_lookup
from .metrics import evaluate_metrics
from ..metrics.weight_metrics import calculate_covariate_balance
PropensityEvaluatorScores = namedtuple(
"PropensityEvaluatorScores", ["prediction_scores", "covariate_balance"]
)
[docs]class WeightPredictions:
"""Data structure to hold weight-model predictions"""
def __init__(
self,
weight_by_treatment_assignment,
weight_for_being_treated,
):
self.weight_by_treatment_assignment = weight_by_treatment_assignment
self.weight_for_being_treated = weight_for_being_treated
[docs] def evaluate_metrics(self, X, a_true, metrics_to_evaluate):
"""
Evaluate covariate balancing of the weight model
Args:
X (pd.DataFrame): Covariates.
a_true (pd.Series): ground truth treatment assignment
metrics_to_evaluate (dict | None): IGNORED.
Returns:
pd.DataFrame: a `covariate_balance` dataframe
"""
covariate_balance = calculate_covariate_balance(
X, a_true, self.weight_by_treatment_assignment
)
# results = PropensityEvaluatorScores(None, covariate_balance)
return covariate_balance
[docs]class PropensityPredictions(WeightPredictions):
"""Data structure to hold propensity-model predictions"""
def __init__(
self,
weight_by_treatment_assignment,
weight_for_being_treated,
treatment_assignment_pred,
propensity,
propensity_by_treatment_assignment,
):
super().__init__(
weight_by_treatment_assignment,
weight_for_being_treated,
)
self.treatment_assignment_pred = treatment_assignment_pred
self.propensity = propensity
self.propensity_by_treatment_assignment = propensity_by_treatment_assignment
[docs] def evaluate_metrics(self, X, a_true, metrics_to_evaluate):
"""
Evaluate metrics on prediction.
Args:
X (pd.DataFrame): Covariates.
a_true (pd.Series): ground truth treatment assignment
metrics_to_evaluate (dict | None): key: metric's name, value: callable that receives
true labels, prediction and sample_weights (the latter may be ignored).
Returns:
WeightEvaluatorScores: Object with two data attributes: "predictions"
and "covariate_balance"
"""
y_pred_proba, y_pred = self._get_predictions_to_evaluate()
evaluated_metrics = evaluate_metrics(
metrics_to_evaluate=metrics_to_evaluate,
y_true=a_true,
y_pred_proba=y_pred_proba,
y_pred=y_pred,
)
# Convert single-dtype Series to a row in a DataFrame:
evaluated_metrics_df = pd.DataFrame(evaluated_metrics).T
# change dtype of each column to numerical if possible:
evaluated_metrics_df = evaluated_metrics_df.apply(
pd.to_numeric, errors="ignore"
)
covariate_balance = calculate_covariate_balance(
X, a_true, self.weight_by_treatment_assignment
)
results = PropensityEvaluatorScores(evaluated_metrics_df, covariate_balance)
# TODO: rename to PropensityEvaluatorScorers ?
return results
def _get_predictions_to_evaluate(self):
y_pred_proba = self.propensity
y_pred = self.treatment_assignment_pred
return y_pred_proba, y_pred
[docs]class OutcomePredictions:
"""Data structure to hold outcome-model predictions"""
def __init__(self, prediction, prediction_event_prob=None):
self.prediction = prediction
self.prediction_event_prob = self._correct_predict_proba_estimate(
prediction, prediction_event_prob
)
self.is_binary_outcome = self.prediction_event_prob is not None
@staticmethod
def _correct_predict_proba_estimate(prediction, prediction_event_prob):
# Estimation output for predict_proba=True has same columns as for predict_proba=False.
# This means either base-learner has no predict_proba/decision_function
# or problem is not classification.
# Either way, it means there are no prediction probabilities
if prediction_event_prob.columns.tolist() == prediction.columns.tolist():
return None
# predict_proba=True was able to predict probabilities. However,
# Prediction probability evaluation is only applicable for binary outcome:
y_values = prediction_event_prob.columns.get_level_values("y").unique()
# Note: on pandas 23.0.0 you could do prediction_event_prob.columns.unique(level='y')
if y_values.size == 2:
event_value = y_values.max()
# get the maximal value, assumes binary 0-1 (1: event, 0: non-event)
# Extract the probability for event:
return prediction_event_prob.xs(key=event_value, axis="columns", level="y")
warnings.warn(
"Multiclass probabilities are not well defined and supported for evaluation.\n"
"Falling back to class predictions.\n"
"Plots might be uninformative due to input being classes and not probabilities."
)
return None
[docs] def evaluate_metrics(self, a, y, metrics_to_evaluate):
"""Evaluate metrics for this model prediction.
Args:
a (pd.Series): treatment assignment
y (pd.Series): ground truth outcomes
metrics_to_evaluate (Dict[str,Callable]): key: metric's name, value: callable that
receives true labels, prediction and sample_weights (the latter may be ignored).
If not provided, defaults from causallib.evaluation.metrics are used.
Returns:
pd.DataFrame: evaluated metrics
"""
scores = {"actual": self._evaluate_metrics_overall(a, y, metrics_to_evaluate)}
scores.update(
{
str(t): self._evaluate_metrics_on_treatment_value(
a, y, metrics_to_evaluate, t
)
for t in sorted(set(a))
}
)
scores = pd.concat(scores, names=["model_strata"], axis="columns").T
scores = scores.apply(pd.to_numeric, errors="ignore")
return scores
def _evaluate_metrics_on_treatment_value(
self, a_true, y_true, metrics_to_evaluate, treatment_value
):
# Stratify based on treatment assignment:
y_is_binary = y_true.nunique() == 2
treatment_value_idx = a_true == treatment_value
y_true_strata = y_true.loc[treatment_value_idx]
prediction_strata = self.prediction.loc[treatment_value_idx, treatment_value]
if y_is_binary:
prediction_prob_strata = self.prediction_event_prob.loc[
treatment_value_idx, treatment_value
]
else:
prediction_prob_strata = None
evaluated_metrics = evaluate_metrics(
metrics_to_evaluate=metrics_to_evaluate,
y_true=y_true_strata,
y_pred=prediction_strata,
y_pred_proba=prediction_prob_strata,
)
return evaluated_metrics
def _evaluate_metrics_overall(self, a_true, y_true, metrics_to_evaluate):
# Score overall:
# # Extract prediction on actual treatment
y_is_binary = y_true.nunique() == 2
prediction_strata = robust_lookup(self.prediction, a_true)
if y_is_binary:
prediction_prob_strata = robust_lookup(self.prediction_event_prob, a_true)
else:
prediction_prob_strata = None
evaluated_metrics = evaluate_metrics(
metrics_to_evaluate=metrics_to_evaluate,
y_true=y_true,
y_pred=prediction_strata,
y_pred_proba=prediction_prob_strata,
)
return evaluated_metrics
[docs] def get_prediction_by_treatment(self, a: pd.Series):
"""Get proba if available else prediction"""
if self.is_binary_outcome:
pred = self.prediction_event_prob
else:
pred = self.prediction
return robust_lookup(pred, a[pred.index])
[docs] def get_proba_by_treatment(self, a: pd.Series):
"""Get proba of prediction"""
return robust_lookup(self.prediction_event_prob, a[self.prediction.index])
SingleFoldPrediction = Union[
PropensityPredictions, WeightPredictions, OutcomePredictions
]