Source code for causallib.evaluation.predictions

"""Predictions from single folds.

Predictions are generated by predictors for causal models. They contain the estimates
for single folds and are combined in the EvaluationResults objects for further analysis.
"""
from collections import namedtuple
from typing import Union
import warnings

import pandas as pd
from ..utils.stat_utils import robust_lookup
from .metrics import evaluate_metrics
from ..metrics.weight_metrics import calculate_covariate_balance


PropensityEvaluatorScores = namedtuple(
    "PropensityEvaluatorScores", ["prediction_scores", "covariate_balance"]
)


[docs]class WeightPredictions:
    """Data structure to hold weight-model predictions"""

    def __init__(
        self,
        weight_by_treatment_assignment,
        weight_for_being_treated,
    ):
        self.weight_by_treatment_assignment = weight_by_treatment_assignment
        self.weight_for_being_treated = weight_for_being_treated

[docs]    def evaluate_metrics(self, X, a_true, metrics_to_evaluate):
        """
        Evaluate covariate balancing of the weight model

        Args:
            X (pd.DataFrame): Covariates.
            a_true (pd.Series): ground truth treatment assignment
            metrics_to_evaluate (dict | None): IGNORED.

        Returns:
            pd.DataFrame: a `covariate_balance` dataframe
        """
        covariate_balance = calculate_covariate_balance(
            X, a_true, self.weight_by_treatment_assignment
        )
        # results = PropensityEvaluatorScores(None, covariate_balance)
        return covariate_balance


[docs]class PropensityPredictions(WeightPredictions):
    """Data structure to hold propensity-model predictions"""

    def __init__(
        self,
        weight_by_treatment_assignment,
        weight_for_being_treated,
        treatment_assignment_pred,
        propensity,
        propensity_by_treatment_assignment,
    ):
        super().__init__(
            weight_by_treatment_assignment,
            weight_for_being_treated,
        )
        self.treatment_assignment_pred = treatment_assignment_pred
        self.propensity = propensity
        self.propensity_by_treatment_assignment = propensity_by_treatment_assignment

[docs]    def evaluate_metrics(self, X, a_true, metrics_to_evaluate):
        """
        Evaluate metrics on prediction.

        Args:
            X (pd.DataFrame): Covariates.
            a_true (pd.Series): ground truth treatment assignment
            metrics_to_evaluate (dict | None): key: metric's name, value: callable that receives
                true labels, prediction and sample_weights (the latter may be ignored).

        Returns:
            WeightEvaluatorScores: Object with two data attributes: "predictions"
                and "covariate_balance"
        """
        y_pred_proba, y_pred = self._get_predictions_to_evaluate()
        evaluated_metrics = evaluate_metrics(
            metrics_to_evaluate=metrics_to_evaluate,
            y_true=a_true,
            y_pred_proba=y_pred_proba,
            y_pred=y_pred,
        )
        # Convert single-dtype Series to a row in a DataFrame:
        evaluated_metrics_df = pd.DataFrame(evaluated_metrics).T
        # change dtype of each column to numerical if possible:
        evaluated_metrics_df = evaluated_metrics_df.apply(
            pd.to_numeric, errors="ignore"
        )

        covariate_balance = calculate_covariate_balance(
            X, a_true, self.weight_by_treatment_assignment
        )

        results = PropensityEvaluatorScores(evaluated_metrics_df, covariate_balance)
        # TODO: rename to PropensityEvaluatorScorers ?
        return results

    def _get_predictions_to_evaluate(self):
        y_pred_proba = self.propensity
        y_pred = self.treatment_assignment_pred
        return y_pred_proba, y_pred


[docs]class OutcomePredictions:
    """Data structure to hold outcome-model predictions"""

    def __init__(self, prediction, prediction_event_prob=None):
        self.prediction = prediction
        self.prediction_event_prob = self._correct_predict_proba_estimate(
            prediction, prediction_event_prob
        )
        self.is_binary_outcome = self.prediction_event_prob is not None

    @staticmethod
    def _correct_predict_proba_estimate(prediction, prediction_event_prob):
        # Estimation output for predict_proba=True has same columns as for predict_proba=False.
        # This means either base-learner has no predict_proba/decision_function
        # or problem is not classification.
        # Either way, it means there are no prediction probabilities
        if prediction_event_prob.columns.tolist() == prediction.columns.tolist():
            return None

        # predict_proba=True was able to predict probabilities. However,
        # Prediction probability evaluation is only applicable for binary outcome:
        y_values = prediction_event_prob.columns.get_level_values("y").unique()
        # Note: on pandas 23.0.0 you could do prediction_event_prob.columns.unique(level='y')
        if y_values.size == 2:
            event_value = y_values.max()
            # get the maximal value, assumes binary 0-1 (1: event, 0: non-event)
            # Extract the probability for event:
            return prediction_event_prob.xs(key=event_value, axis="columns", level="y")

        warnings.warn(
            "Multiclass probabilities are not well defined and supported for evaluation.\n"
            "Falling back to class predictions.\n"
            "Plots might be uninformative due to input being classes and not probabilities."
        )
        return None

[docs]    def evaluate_metrics(self, a, y, metrics_to_evaluate):
        """Evaluate metrics for this model prediction.

        Args:
            a (pd.Series): treatment assignment
            y (pd.Series): ground truth outcomes
            metrics_to_evaluate (Dict[str,Callable]): key: metric's name, value: callable that
                receives true labels, prediction and sample_weights (the latter may be ignored).
                If not provided, defaults from causallib.evaluation.metrics are used.

        Returns:
            pd.DataFrame: evaluated metrics
        """

        scores = {"actual": self._evaluate_metrics_overall(a, y, metrics_to_evaluate)}

        scores.update(
            {
                str(t): self._evaluate_metrics_on_treatment_value(
                    a, y, metrics_to_evaluate, t
                )
                for t in sorted(set(a))
            }
        )

        scores = pd.concat(scores, names=["model_strata"], axis="columns").T
        scores = scores.apply(pd.to_numeric, errors="ignore")
        return scores

    def _evaluate_metrics_on_treatment_value(
        self, a_true, y_true, metrics_to_evaluate, treatment_value
    ):
        # Stratify based on treatment assignment:
        y_is_binary = y_true.nunique() == 2
        treatment_value_idx = a_true == treatment_value
        y_true_strata = y_true.loc[treatment_value_idx]
        prediction_strata = self.prediction.loc[treatment_value_idx, treatment_value]
        if y_is_binary:
            prediction_prob_strata = self.prediction_event_prob.loc[
                treatment_value_idx, treatment_value
            ]
        else:
            prediction_prob_strata = None

        evaluated_metrics = evaluate_metrics(
            metrics_to_evaluate=metrics_to_evaluate,
            y_true=y_true_strata,
            y_pred=prediction_strata,
            y_pred_proba=prediction_prob_strata,
        )

        return evaluated_metrics

    def _evaluate_metrics_overall(self, a_true, y_true, metrics_to_evaluate):
        # Score overall:
        # # Extract prediction on actual treatment
        y_is_binary = y_true.nunique() == 2
        prediction_strata = robust_lookup(self.prediction, a_true)
        if y_is_binary:
            prediction_prob_strata = robust_lookup(self.prediction_event_prob, a_true)
        else:
            prediction_prob_strata = None

        evaluated_metrics = evaluate_metrics(
            metrics_to_evaluate=metrics_to_evaluate,
            y_true=y_true,
            y_pred=prediction_strata,
            y_pred_proba=prediction_prob_strata,
        )

        return evaluated_metrics

[docs]    def get_prediction_by_treatment(self, a: pd.Series):
        """Get proba if available else prediction"""
        if self.is_binary_outcome:
            pred = self.prediction_event_prob
        else:
            pred = self.prediction
        return robust_lookup(pred, a[pred.index])

[docs]    def get_proba_by_treatment(self, a: pd.Series):
        """Get proba of prediction"""
        return robust_lookup(self.prediction_event_prob, a[self.prediction.index])


SingleFoldPrediction = Union[
    PropensityPredictions, WeightPredictions, OutcomePredictions
]