"""Functions that calculate curve data for cross validation plots."""
from typing import List
import warnings
import numpy as np
import pandas as pd
from sklearn import metrics
[docs]def calculate_roc_curve(curve_data):
"""Calculates ROC curve on the folds
Args:
curve_data (dict) : dict of curves produced by
BaseEvaluationPlotDataExtractor.calculate_curve_data
Returns:
dict[str, list[np.ndarray]]: Keys being "FPR", "TPR" and "AUC" (ROC metrics)
and values are a list the size of number of folds with the evaluation of each fold.
"""
for curve_name in curve_data.keys():
curve_data[curve_name]["FPR"] = curve_data[curve_name].pop("first_ret_value")
curve_data[curve_name]["TPR"] = curve_data[curve_name].pop("second_ret_value")
curve_data[curve_name]["AUC"] = curve_data[curve_name].pop("area")
return curve_data
[docs]def calculate_pr_curve(curve_data, targets):
"""Calculates precision-recall curve on the folds.
Args:
curve_data (dict) : dict of curves produced by
BaseEvaluationPlotDataExtractor.calculate_curve_data
targets (pd.Series): True labels.
Returns:
dict[str, list[np.ndarray]]: Keys being "Precision", "Recall" and "AP" (PR metrics)
and values are a list the size of number of folds with the
evaluation of each fold.
Additional "prevalence" key, with positive-label "prevalence" is added
to be used by the chance curve.
"""
for curve_name in curve_data.keys():
curve_data[curve_name]["Precision"] = curve_data[curve_name].pop(
"first_ret_value"
)
curve_data[curve_name]["Recall"] = curve_data[curve_name].pop(
"second_ret_value"
)
curve_data[curve_name]["AP"] = curve_data[curve_name].pop("area")
curve_data["prevalence"] = targets.value_counts(normalize=True).loc[targets.max()]
return curve_data
[docs]def calculate_curve_data_binary_outcome(
folds_predictions,
targets,
curve_metric,
area_metric,
stratify_by=None,
):
"""Calculate different performance (ROC or PR) curves
Args:
folds_predictions (list[pd.Series]): Predictions for each fold.
targets (pd.Series): True labels
curve_metric (callable): Performance metric returning 3 output vectors - metric1,
metric2 and thresholds. Where metric1 and metric2 depict the curve
when plotted on x-axis and y-axis.
area_metric (callable): Performance metric of the area under the curve.
stratify_by (pd.Series): Group assignment to stratify by.
Returns:
dict[str, dict[str, list[np.ndarray]]]: Evaluation of the metric
for each fold and for each curve.
One curve for each group level in `stratify_by`.
On general: {curve_name: {metric1: [evaluation_fold_1, ...]}}.
For example: {"Treatment=1": {"FPR": [FPR_fold_1, FPR_fold_2, FPR_fold_3]}}
"""
# folds_targets = [targets.loc[p.index] for p in folds_predictions]
# folds_stratify_by = [stratify_by.loc[p.index] for p in folds_predictions]
stratify_values = sorted(set(stratify_by))
curve_data = {}
for stratum_level in stratify_values:
# Slice data for that stratum level across the folds:
folds_stratum_predictions, folds_stratum_targets = [], []
for fold_predictions in folds_predictions:
# Extract fold:
fold_targets = targets.loc[fold_predictions.index]
fold_stratify_by = stratify_by.loc[fold_predictions.index]
# Extract stratum:
mask = fold_stratify_by == stratum_level
fold_predictions = fold_predictions.loc[mask]
fold_targets = fold_targets.loc[mask]
# Save:
folds_stratum_predictions.append(fold_predictions)
folds_stratum_targets.append(fold_targets)
(
area_folds,
first_ret_folds,
second_ret_folds,
threshold_folds,
) = calculate_performance_curve_data_on_folds(
folds_stratum_predictions,
folds_stratum_targets,
None,
area_metric,
curve_metric,
)
curve_data[f"Treatment={stratum_level}"] = {
"first_ret_value": first_ret_folds,
"second_ret_value": second_ret_folds,
"Thresholds": threshold_folds,
"area": area_folds,
}
return curve_data
[docs]def calculate_curve_data_propensity(
fold_predictions: List[
"causallib.evaluation.weight_predictor.PropensityPredictions"
],
targets,
curve_metric,
area_metric,
):
"""Calculate different performance (ROC or PR) curves
Args:
fold_predictions (list[PropensityEvaluatorPredictions]):
Predictions for each fold.
targets (pd.Series): True labels
curve_metric (callable): Performance metric returning 3 output vectors - metric1,
metric2 and thresholds. Where metric1 and metric2 depict the curve when plotted
on x-axis and y-axis.
area_metric (callable): Performance metric of the area under the curve.
**kwargs:
Returns:
dict[str, dict[str, list[np.ndarray]]]: Evaluation of the metric
for each fold and for each curve.
3 curves:
* "unweighted" (regular)
* "weighted" (weighted by inverse propensity)
* "expected" (duplicated population, weighted by propensity)
On general: {curve_name: {metric1: [evaluation_fold_1, ...]}}.
For example: {"weighted": {"FPR": [FPR_fold_1, FPR_fold_2, FPR_fold3]}}
"""
curves_sample_weights = {
"unweighted": [None for _ in fold_predictions],
"weighted": [
fold_predictions.weight_by_treatment_assignment
for fold_predictions in fold_predictions
],
"expected": [
pd.concat([fold_predictions.propensity, 1 - fold_predictions.propensity])
for fold_predictions in fold_predictions
],
}
curves_folds_targets = [
targets.loc[fold_predictions.weight_by_treatment_assignment.index]
for fold_predictions in fold_predictions
]
curves_folds_targets = {
"unweighted": curves_folds_targets,
"weighted": curves_folds_targets,
"expected": [
pd.concat([
pd.Series(data=targets.max(), index=fold_predictions.propensity.index),
pd.Series(data=targets.min(), index=fold_predictions.propensity.index)
])
for fold_predictions in fold_predictions
],
}
fold_predictions = {
"unweighted": [
fold_predictions.propensity for fold_predictions in fold_predictions
],
"weighted": [
fold_predictions.propensity for fold_predictions in fold_predictions
],
"expected": [
pd.concat([fold_predictions.propensity, fold_predictions.propensity])
for fold_predictions in fold_predictions
],
}
# Expected curve duplicates the population, basically concatenating so that:
# prediction = [p, p], target = [1, 0], weights = [p, 1-p]
curve_data = {}
for curve_name in curves_sample_weights:
sample_weights = curves_sample_weights[curve_name]
folds_targets = curves_folds_targets[curve_name]
folds_predictions = fold_predictions[curve_name]
(
area_folds,
first_ret_folds,
second_ret_folds,
threshold_folds,
) = calculate_performance_curve_data_on_folds(
folds_predictions,
folds_targets,
sample_weights,
area_metric,
curve_metric,
)
curve_data[curve_name] = {
"first_ret_value": first_ret_folds,
"second_ret_value": second_ret_folds,
"Thresholds": threshold_folds,
"area": area_folds,
}
# Rename keys (as will be presented as curve labels in legend)
curve_data["Propensity"] = curve_data.pop("unweighted")
curve_data["Weighted"] = curve_data.pop("weighted")
curve_data["Expected"] = curve_data.pop("expected")
return curve_data