Source code for causallib.evaluation.plots.plots

(C) Copyright 2019 IBM Corp.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
See the License for the specific language governing permissions and
limitations under the License.

Created on Aug 22, 2018

from itertools import cycle
from typing import Callable
import warnings

import matplotlib.colors
import matplotlib.pyplot as plt
import matplotlib.ticker
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.stats import gaussian_kde
from sklearn import metrics

# TODO: propensity distribution using CDF (and not reflecting if so)
# TODO: consider making plots to not rely on pandas input (and can work more generally with numpy)?
# TODO: consider refactoring each type (family?) of plots to its own module (unify through __init__?)
# TODO: consider making plot module be class-based instead, taking its argument during init
#       and having a `plot()` interface

CONTINUOUS_ACCURACY_PLOT = "continuous_accuracy"
RESIDUALS_PLOT = "residuals"
COMMON_SUPPORT_PLOT = "common_support"
ROC_CURVE_PLOT = "roc_curve"
PR_CURVE_PLOT = "pr_curve"
CALIBRATION_PLOT = "calibration"
WEIGHT_DISTRIBUTION_PLOT = "weight_distribution"
COVARIATE_BALANCE_LOVE_PLOT = "covariate_balance_love"
COVARIATE_BALANCE_SLOPE_PLOT = "covariate_balance_slope"

WeightPlotNames = frozenset({

ContinuousOutputPlotNames = frozenset({

BinaryOutputPlotNames = frozenset({CALIBRATION_PLOT, ROC_CURVE_PLOT, PR_CURVE_PLOT})

PropensityPlotNames = frozenset(BinaryOutputPlotNames | WeightPlotNames)

[docs]def lookup_name(name: str) -> Callable: """Lookup function for plot name. Canonical plot names are defined in this file as globals. Incorrect names will raise KeyError. Args: name (str): plot name to lookup Returns: Callable: plot function """ return { CONTINUOUS_ACCURACY_PLOT: plot_continuous_prediction_accuracy_folds, RESIDUALS_PLOT: plot_residual_folds, COMMON_SUPPORT_PLOT: plot_counterfactual_common_support_folds, ROC_CURVE_PLOT: plot_roc_curve_folds, PR_CURVE_PLOT: plot_precision_recall_curve_folds, CALIBRATION_PLOT: plot_calibration_folds, WEIGHT_DISTRIBUTION_PLOT: plot_propensity_score_distribution_folds, COVARIATE_BALANCE_LOVE_PLOT: plot_mean_features_imbalance_love_folds, COVARIATE_BALANCE_SLOPE_PLOT: plot_mean_features_imbalance_slope_folds, }[name]
def _calculate_mutual_bins(x, y, bins="auto"): """ A common support for two vectors. Args: x (pd.Series): y (pd.Series): bins: compatible with numpy's bins parameter. Returns: np.array: bins cutoffs. """ data = np.append(x, y) bins = np.histogram(data, bins=bins)[1] return bins
[docs]def plot_counterfactual_common_support(prediction, a, ax=None): cv = [np.arange(a.shape[0])] ax = plot_counterfactual_common_support_folds([prediction], hue_by=a, cv=cv, ax=ax) return ax
[docs]def plot_counterfactual_common_support_folds( predictions, hue_by, cv, alpha_by_density=True, ax=None ): """Plot the scatter plot of y0 vs. y1 for multiple scoring results, colored by the treatment Args: predictions (list[pd.Series]): List, the size of number of folds, of outcome prediction values. hue_by (pd.Series): Group assignment (as in treatment assignment) of the entire dataset. (indices from `cv` will be used to slice this vector) cv (list[np.array]): List, the size of number of folds, of row indices (as in iloc locations) - the indices of samples participating the fold. alpha_by_density (bool): Whether to calculate points alpha value (transparent-opaque) with density estimation. This can take some time to compute for large number of points. If False, alpha calculation will be a simple fast heuristic. ax (plt.Axes): The axes on which the plot will be displayed. Optional. """ effect_folds = [ (prediction.iloc[:, 1] - prediction.iloc[:, 0]).mean() for prediction in predictions ] predictions = pd.concat(predictions) # type: pd.DataFrame treatment = pd.concat([hue_by.iloc[fold_idx] for fold_idx in cv]) # type: pd.Series ax = _scatter_hue( predictions.iloc[:, 0], predictions.iloc[:, 1], treatment, alpha_by_density, ax=ax, ) effect_label = rf"mean effect={np.mean(effect_folds):.2g}" effect_label += rf"$\pm${np.std(effect_folds):.2g}" if len(effect_folds) > 1 else "" ax.plot( [], [], color=ax.get_facecolor(), label=effect_label # Use background color ) _add_diagonal(ax) ax.legend(loc="best") ax.set_xlabel(r"Predicted $Y^0$") ax.set_ylabel(r"Predicted $Y^1$") ax.set_title("Predicted Common Support") return ax
[docs]def plot_continuous_prediction_accuracy( predictions, y, a, alpha_by_density=True, ax=None ): cv = [np.arange(a.shape[0])] ax = plot_continuous_prediction_accuracy_folds( [predictions], y, a, cv, alpha_by_density, ax=ax, plot_residuals=False ) return ax
[docs]def plot_continuous_prediction_accuracy_folds( predictions, y, a, cv, alpha_by_density=True, plot_residuals=False, ax=None ): # Concatenate data across folds: treatments = [] outcomes = [] predictions_on_actual = [] r2_scores = [] for fold_prediction, fold_idx in zip(predictions, cv): fold_a = a.iloc[fold_idx] fold_y = y.iloc[fold_idx] if plot_residuals: fold_y = fold_y - fold_prediction r2_scores.append(metrics.r2_score(fold_y, fold_prediction)) treatments.append(fold_a) outcomes.append(fold_y) predictions_on_actual.append(fold_prediction) treatments = pd.concat(treatments) # type: pd.Series outcomes = pd.concat(outcomes) # type: pd.Series predictions_on_actual = pd.concat(predictions_on_actual) # type: pd.Series ax = _scatter_hue(predictions_on_actual, outcomes, treatments, alpha_by_density, ax) # R-squared label: if not plot_residuals: r2_label = rf"$R^2={np.mean(r2_scores):.2f}" r2_label += rf"\pm{np.std(r2_scores):.2f}$" if len(r2_scores) > 1 else "$" ax.plot( [], [], color=ax.get_facecolor(), label=r2_label ) # invisible color so as to not show line in legend _add_diagonal(ax) ax.legend(loc="best") ax.set_xlabel("Predicted values") ax.set_ylabel("Prediction residuals" if plot_residuals else "True values") ax.set_title("Residual Plot" if plot_residuals else "Continuous Accuracy Plot") return ax
[docs]def plot_residual_folds(predictions, y, a, cv, alpha_by_density=True, ax=None): ax = plot_continuous_prediction_accuracy_folds( predictions, y, a, cv, alpha_by_density, plot_residuals=True, ax=ax ) ax.axhline(0.0, linestyle="--", color="grey", zorder=0, alpha=0.75) return ax
[docs]def plot_residual(predictions, y, a, alpha_by_density=True, ax=None): cv = [np.arange(a.shape[0])] ax = plot_residual_folds([predictions], y, a, cv, alpha_by_density, ax) return ax
def _scatter_hue(x, y, hue, alpha_by_density=True, ax=None): ax = ax or plt.gca() points_rgba = ( _get_alpha_per_point_with_density(X=[x, y], hue=hue) if alpha_by_density else None ) for i, treatment_val in enumerate(np.sort(np.unique(hue))): idx_mask = hue == treatment_val # type: pd.Series cur_color = ( points_rgba.loc[idx_mask].values if points_rgba is not None else None ) cur_alpha = np.clip(10 / np.sqrt(idx_mask.sum()), 0.01, 1) ax.scatter( x=x.loc[idx_mask], y=y.loc[idx_mask], alpha=cur_alpha if points_rgba is None else None, facecolor=cur_color, edgecolors="none", label=f"treatment={treatment_val}", ) return ax def _get_alpha_per_point_with_density(X, hue, min_alpha_bound=0.3, max_alpha_bound=1.0): """ Matplotlib does not support pointwise alpha values (rather, constant value for an entire plt.plot()). This function will utilize a supported pointwise color-scheme, using rgba, and passing the individual alpha values as the 4th dimension ('a') of the rgba. Args: X: in a form compatible with statsmodels' KDEMultivariate (list of pd.Series, or pd.DataFrame) hue (pd.Series): A vector with group assignment for each point in x. min_alpha_bound (float | None): Value between 0 and 1, used to linearly rescale the alpha values. If None, rescale is avoided. Default of 0.3, since lower values are usually too unobservable. max_alpha_bound (float | None): Value between 0 and 1, used to linearly rescale the alpha values. If None, rescale is avoided. Returns: """ points_rgba = pd.DataFrame(index=hue.index, columns=list("rgba"), dtype=np.float64) # Calculate alpha for each point based on its density: kde = sm.nonparametric.KDEMultivariate(data=X, var_type="cc", bw="normal_reference") # = * 0.5 # Rescale bandwidth to be narrower points_density = kde.pdf(X) # Invert values - the denser the point -> the lower its alpha (more transparent) points_alpha = 1 / points_density if (min_alpha_bound is not None) and (max_alpha_bound is not None): # Rescale alphas (linearly) to the range of 0.3 to 1: points_alpha = min_alpha_bound + (max_alpha_bound - min_alpha_bound) * ( (points_alpha - points_alpha.min()) / (points_alpha.max() - points_alpha.min()) ) points_rgba["a"] = points_alpha # Assign the alpha values for i, hue_val in enumerate(np.sort(np.unique(hue))): idx_mask = hue == hue_val cur_color = f"C{i}" # Cycle through the colors cur_color = matplotlib.colors.to_rgb( cur_color ) # Get RGB value of the current color points_rgba.loc[ idx_mask, ["r", "g", "b"] ] = cur_color # Assign that constant RGB val for all current points return points_rgba
[docs]def plot_calibration_folds( predictions, targets, cv, n_bins=10, plot_se=True, plot_rug=False, plot_histogram=False, quantile=False, ax=None, ): """Plot calibration curves for multiple models (presumably in folds) Args: predictions (list[pd.Series]): list (each entry of a fold) of arrays - probability ("scores") predictions. targets (pd.Series): true labels to calibrate against on the overall data (not divided to folds). cv (list[np.array]): n_bins (int): number of bins to evaluate in the plot plot_se (bool): Whether to plot standard errors around the mean bin-probability estimation. plot_rug: plot_histogram: quantile (bool): If true, the binning of the calibration curve is by quantiles. Default is false ax (plt.Axes): Optional Note: One of plot_propensity or plot_model must be True. Returns: """ for i, idx_fold in enumerate(cv): predictions_fold = predictions[i] target_fold = targets.iloc[idx_fold] ax = _plot_calibration_single( y_true=target_fold, y_prob=predictions_fold, n_bins=n_bins, plot_diagonal=False, plot_se=plot_se, plot_rug=plot_rug, plot_histogram=plot_histogram, quantile=quantile, label=f"fold {i}", ax=ax, ) _add_diagonal(ax) ax.legend(loc="best") # ax.set_title("{} Calibration".format("Propensity" if y is None else "Outcome")) ax.set_title("Calibration") return ax
[docs]def plot_calibration( predictions, targets, n_bins=10, plot_se=True, plot_rug=False, plot_histogram=True, quantile=False, ax=None, ): cv = [np.arange(predictions.shape[0])] return plot_calibration_folds( [predictions], targets, cv=cv, n_bins=n_bins, plot_se=plot_se, plot_rug=plot_rug, plot_histogram=plot_histogram, quantile=quantile, ax=ax, )
def _plot_calibration_single( y_true, y_prob, n_bins=10, plot_diagonal=True, plot_se=True, plot_rug=False, plot_histogram=False, quantile=False, label=None, ax=None, ): """Plot a calibration curve showing how well y_prob predicts the probability of a binary outcome y The standard deviation of a binomial distribution p(1-p)/sqrt(n) is used to calculate the values for which p would be one standard deviation away. This means we are looking for r +/- sqrt(r(1-r)/n) = p This provides a cubic equation for r whose solution is r = (2np+1 +/- sqrt(4np(1-p)+1)) / (2n+2) Args: y_prob (pd.Series): y_true (pd.Series): n_bins (int): the number of bins to use for the calibration plot plot_se (bool): Whether to plot standard errors around the mean bin-probability estimation. plot_diagonal (bool): Whether to plot a diagonal line or not. plot_rug (bool): Whether to plot rug of the prediction plot_histogram (bool): Whether to plot histogram at the background. quantile (bool): If False specifies equal sized bins, if True splits the probabilities into n_bins quantiles. ax (plt.Axes): label(str): The label for the plotted line Returns: """ ax = ax or plt.gca() if quantile: bins = np.unique( np.percentile(y_prob, np.linspace(0, 100, n_bins + 1).astype(int)) ) # in case all values of y_prob are the same bins = bins if len(bins) > 1 else np.concatenate([bins, bins]) bins[-1] += 1e-8 prob_true, prob_pred, counts = calibration_curve(y_true, y_prob, bins=bins) else: prob_true, prob_pred, counts = calibration_curve(y_true, y_prob, bins=n_bins) bins = np.linspace(0.0, 1.0 + 1e-8, n_bins + 1) if plot_rug: ax.plot(y_prob, np.full_like(y_prob, 0.01), "|", color="black", alpha=0.7) line_color = None if plot_histogram: hist_line = ax.plot( bins, (counts / counts.sum()), drawstyle="steps-post", alpha=0.8 ) hist_line = hist_line[0] # keep histogram behind any new lines that are plotted after it. hist_line.set_zorder(2) # if plotting hist, keep track of color to use in the line to be plotted line_color = hist_line.get_color() if plot_diagonal: _add_diagonal(ax) lines = ax.plot(prob_pred, prob_true, "s-", color=line_color, label=label) # Plot standard error: if plot_se: disc = (4 * counts * prob_true) * (1 - prob_true) + 1 upper = (2 * counts * prob_true + 1 + np.sqrt(disc)) / (2 * counts + 2) lower = (2 * counts * prob_true + 1 - np.sqrt(disc)) / (2 * counts + 2) ax.fill_between( x=prob_pred, y1=lower, y2=upper, color=lines[-1].get_color(), alpha=0.5 ) ax.set_xlabel("Predicted probability") ax.set_ylabel("Observed probability") return ax
[docs]def calibration_curve(y_true, y_prob, bins=5): """ Compute calibration curve of a classifier given its scores output and true label assignment. Args: y_true (pd.Series): True binary label assignment. y_prob (pd.Series): Predicted probability of each sample being the positive label. bins (int | list | np.ndarray | pd.Series): If int, it defines the number of equal-width bins in the given range (5, by default). If bins a sequence, it defines the bin edges, including the rightmost edge, allowing for non-uniform bin widths. Returns: (pd.Series, pd.Series, pd.Series): empirical_prob, predicted_prob, bin_counts empirical_prob: The fraction of positive labels in each bins predicted_prob: The average of predicted probability in each bin bin_counts: The number of samples fallen in each bin References: [1] Zadrozny, B., & Elkan, C. (2002, July). Transforming classifier scores into accurate multiclass probability estimates """ # Get binning out of provided bins if type(bins) is int: bins = np.linspace(0.0, 1.0 + 1e-8, bins + 1) elif hasattr(bins, "__len__") and not isinstance(bins, str): # Some sort of vector bins = np.sort(np.ravel(bins)) if y_prob.max() > bins.max() or y_prob.min() < bins.min(): raise ValueError("y_prob has values outside the provided bins") else: raise TypeError("bins must either be an integer or a sequence of scalars") bin_of_samples = pd.cut(y_prob, bins, labels=np.arange(len(bins) - 1)).astype(int) predicted_prob = y_prob.groupby(bin_of_samples).mean() empirical_prob = y_true.groupby(bin_of_samples).mean() bin_counts = bin_of_samples.value_counts(sort=False) return empirical_prob, predicted_prob, bin_counts
[docs]def plot_roc_curve_folds( curve_data, ax=None, plot_folds=False, label_folds=False, label_std=False, **kwargs ): num_of_curves = len(curve_data.keys()) color_list = [f"C{_}" for _ in range(num_of_curves)] for (curve_name, curve_data), color in zip(curve_data.items(), color_list): fprs = curve_data["FPR"] tprs = curve_data["TPR"] aucs = curve_data["AUC"] ax = _plot_single_performance_curve( fprs, tprs, aucs, "AUC", color, curve_name, label_std, label_folds, plot_folds, num_of_curves != 1, ax, ) # Plot chance curve: ax.plot( [0, 1], [0, 1], linestyle="--", lw=2, color="black", label="Chance", alpha=0.8 ) ax.set_xlim(left=-0.05, right=1.05) ax.set_ylim(bottom=-0.05, top=1.05) ax.set_xlabel("False Positive Rate") ax.set_ylabel("True Positive Rate") ax.set_title("ROC Curve") ax.legend(loc="lower right") return ax
[docs]def plot_precision_recall_curve_folds( curve_data, ax=None, plot_folds=False, label_folds=False, label_std=False, **kwargs ): # TODO: Check why it does not end at class prevalence (for recall=1.0) num_of_curves = len(curve_data.keys()) color_list = [f"C{_}" for _ in range(num_of_curves)] pos_class_prevalence = curve_data.pop("prevalence", None) for (curve_name, curve_data), color in zip(curve_data.items(), color_list): recalls = curve_data["Recall"] precisions = curve_data["Precision"] aps = curve_data["AP"] ax = _plot_single_performance_curve( recalls, precisions, aps, "AP", color, curve_name, label_std, label_folds, plot_folds, num_of_curves != 1, ax, ) # Plot chance curve: if pos_class_prevalence is not None: ax.plot( [0, 1], [pos_class_prevalence, pos_class_prevalence], linestyle="--", lw=2, color="black", label="Chance", alpha=0.8, ) ax.set_xlim(left=-0.05, right=1.05) ax.set_ylim(bottom=-0.05, top=1.05) ax.set_xlabel("Recall") ax.set_ylabel("Precision") ax.set_title("PR Curve") ax.legend(loc="lower left") return ax
def _plot_single_performance_curve( xs, ys, areas, areas_type, color="C0", curve_name="", label_std=False, label_folds=False, plot_folds=False, colored_folds=False, ax=None, ): ax = ax or plt.gca() assert len(xs) == len(ys) == len(areas) n_folds = len(xs) x_domain = np.linspace(0, 1, 100) ys_interp = [] for i in range(n_folds): if areas_type == "AP": # precision/recall need to be reversed for interpolation ys_interp.append(np.interp(x_domain, xs[i][::-1], ys[i][::-1])) else: ys_interp.append(np.interp(x_domain, xs[i], ys[i])) ys_interp[-1][0] = 0.0 area = areas[i] folds_label = f"Fold {i} ({areas_type} = {area:.2f})" if label_folds else None if plot_folds: # use multiple colors if plotting only one stratum folds_color = None if colored_folds else color ax.plot(xs[i], ys[i], lw=1, alpha=0.3, color=folds_color, label=folds_label) # Plot main (folds average) curve mean_ys = np.nanmean(ys_interp, axis=0) # if areas_type == "AUC": # mean_ys[-1] = 1.0 mean_area = np.nanmean(areas) std_area = np.nanstd(areas) ax.plot( x_domain, mean_ys, color=color, label=rf"{curve_name} ({areas_type} = {mean_area:.2f} $\pm$ {std_area:.2f})", lw=2, alpha=0.9, ) # Plot uncertainty around main curve: ys_std = np.std(ys_interp, axis=0) upper_ys = np.minimum(mean_ys + ys_std, 1) lower_ys = np.maximum(mean_ys - ys_std, 0) std_label = r"$\pm$ 1 std. dev." if label_std else None ax.fill_between( x_domain, lower_ys, upper_ys, color=color, alpha=0.2, label=std_label ) return ax
[docs]def plot_propensity_score_distribution( propensity, treatment, reflect=True, kde=False, cumulative=False, norm_hist=True, ax=None, ): """ Plot the distribution of propensity score Args: propensity (pd.Series): treatment (pd.Series): reflect (bool): Whether to plot second treatment group on the opposite sides of the x-axis. This can only work if there are exactly two groups. kde (bool): Whether to plot kernel density estimation cumulative (bool): Whether to plot cumulative distribution. norm_hist (bool): If False - use raw counts on the y-axis. If kde=True, then norm_hist should be True as well. ax (plt.Axes | None): Returns: """ # assert propensity.index.symmetric_difference(a.index).size == 0 ax = ax or plt.gca() if kde and not norm_hist: warnings.warn( "kde=True and norm_hist=False is not supported. Forcing norm_hist from False to True." ) norm_hist = True bins = np.histogram(propensity, bins="auto")[1] plot_params = dict(bins=bins, density=norm_hist, alpha=0.5, cumulative=cumulative) unique_treatments = np.sort(np.unique(treatment)) for treatment_number, treatment_value in enumerate(unique_treatments): cur_propensity = propensity.loc[treatment == treatment_value] cur_color = f"C{treatment_number}" ax.hist( cur_propensity, label=f"treatment = {treatment_value}", color=[cur_color], **plot_params, ) if kde: cur_kde = gaussian_kde(cur_propensity) min_support = max(0, cur_propensity.values.min() - cur_kde.factor) max_support = min(1, cur_propensity.values.max() + cur_kde.factor) X_plot = np.linspace(min_support, max_support, 200) if cumulative: density = np.array( [cur_kde.integrate_box_1d(X_plot[0], x_i) for x_i in X_plot] ) ax.plot( X_plot, density, color=cur_color, ) else: ax.plot( X_plot, cur_kde.pdf(X_plot), color=cur_color, ) if reflect: if len(unique_treatments) != 2: raise ValueError( "Reflecting density across X axis can only be done for two groups. " "This one has {}".format(len(unique_treatments)) ) # Update line: if kde: last_line = ax.get_lines()[-1] last_line.set_ydata(-1 * last_line.get_ydata()) # Update histogram bars: idx_of_first_hist_rect = [patch.get_label() for patch in ax.patches].index( f"treatment = {unique_treatments[-1]}" ) for patch in ax.patches[idx_of_first_hist_rect:]: patch.set_height(-1 * patch.get_height()) # Re-set the view of axes: ax.relim() ax.autoscale() # Remove negation sign from lower y-axis: ax.yaxis.set_major_formatter( matplotlib.ticker.FuncFormatter( lambda x, pos: str(x) if x >= 0 else str(-x) ) ) ax.legend(loc="best") x_type = ( "Propensity" if propensity.between(0, 1, inclusive="both").all() else "Weights" ) ax.set_xlabel(x_type) y_type = "Probability density" if norm_hist else "Counts" ax.set_ylabel(y_type) ax.set_title(f"{x_type} Distribution") return ax
[docs]def plot_propensity_score_distribution_folds( predictions, hue_by, cv, reflect=True, kde=False, cumulative=False, norm_hist=True, ax=None, ): """ Args: predictions (list[pd.Series]): X (pd.DataFrame): hue_by (pd.Series): y (pd.Series): cv (list[np.array]): reflect (bool): Whether to plot second treatment group on the opposite sides of the x-axis. This can only work if there are exactly two groups. kde (bool): Whether to plot kernel density estimation cumulative (bool): Whether to plot cumulative distribution. norm_hist (bool): If False - use raw counts on the y-axis. If kde=True, then norm_hist should be True as well. ax (plt.Axis): Returns: """ propensity = pd.concat(predictions) # type: pd.Series # treatment = hue_by # if train phase then there will be no duplication of records. treatment = pd.concat([hue_by.iloc[fold_idx] for fold_idx in cv]) # type: pd.Series ax = plot_propensity_score_distribution( propensity, treatment, reflect=reflect, kde=kde, cumulative=cumulative, norm_hist=norm_hist, ax=ax, ) return ax
[docs]def plot_mean_features_imbalance_love_folds( table1_folds, cv=None, aggregate_folds=True, thresh=None, plot_semi_grid=True, ax=None, ): method_pretty_name = { "smd": "Standard Mean Difference", "abs_smd": "Absolute Standard Mean Difference", "ks": "Kolmogorov-Smirnov", } ax = ax or plt.gca() # Aggregate across folds. This will be used to determine order, and extreme values. # Use this groupby trick: aggregated_table1 = pd.concat(table1_folds) # type: pd.DataFrame aggregated_table1 = aggregated_table1.groupby(aggregated_table1.index) order = aggregated_table1.mean().sort_values(by="unweighted", ascending=True).index if aggregate_folds: # place in iterable to make compatible with input table1_folds = [aggregated_table1.mean()] # Plot: for table1 in table1_folds: color_cycle = cycle(plt.rcParams["axes.prop_cycle"].by_key()["color"]) marker_cycle = cycle(["o", "^", "P", "s", "*"]) for col_name, col_data in table1.items(): col_data = col_data.loc[order] # unified order ax.scatter( col_data, order, label=col_name, marker=next(marker_cycle), color=next(color_cycle), zorder=0, ) # Plot line connecting the dots: (before plotting dots so they would be underneath the dots) if plot_semi_grid: if aggregate_folds: h_max = aggregated_table1.mean().max(axis="columns") h_min = aggregated_table1.mean().min(axis="columns") else: h_max = aggregated_table1.max().max(axis="columns") h_min = aggregated_table1.min().min(axis="columns") ax.hlines( h_min.index, xmin=h_min, xmax=h_max, # ax.hlines(order, xmin=h_min, xmax=h_max, colors="grey", linestyles="dashed", zorder=1, label=None, ) # Plot vertical threshold line if thresh is not None: ax.axvline(thresh, color="grey", linestyle="--", zorder=2) if aggregated_table1.min().min().min() < 0: # There are negative values, plot the minus of threshold and adjust x-limits to be symmetric: ax.axvline(-thresh, color="grey", linestyle="--", zorder=2) ax.set_xlim(-np.max(np.abs(ax.get_xlim())), np.max(np.abs(ax.get_xlim()))) # # If too many features, remove their tick labels: fig = ax.get_figure() ax_pixel_height = ( ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()).height * fig.dpi ) # 10 is hypothesized to be font size + 3 pt. margin if ax_pixel_height / order.size < 10 + 3: ax.set_yticklabels([]) # Too many y-ticks for axis size, remove them. ax.set_xlabel( method_pretty_name.get( table1_folds[0], table1_folds[0] ) ) ax.set_ylabel("Covariates") ax.legend(loc="lower right") return ax
[docs]def plot_mean_features_imbalance_scatter_plot( table1_folds, aggregate_folds=True, thresh=None, label_imbalanced=True, ax=None, ): # get current axes ax = ax or plt.gca() method_pretty_name = { "smd": "Standard Mean Difference", "abs_smd": "Absolute Standard Mean Difference", "ks": "Kolmogorov-Smirnov", } # Aggregate across folds. This will be used to determine order, and extreme values. # Use this groupby trick: aggregated_table1 = pd.concat(table1_folds) # type: pd.DataFrame aggregated_table1 = aggregated_table1.groupby(aggregated_table1.index) if aggregate_folds: table1_folds = [aggregated_table1.mean()] # Plot: for table1 in table1_folds: # setting different marker shapes for each fold in aggregated_foldes == False marker_cycle = cycle(["o", "^", "P", "s", "*"]) # find index of features that are above threshold violating = table1["weighted"] > thresh # determain color for dot on plot color = violating.replace({False: "C0", True: "C1"}) ax.scatter( x=table1['unweighted'], y=table1['weighted'], marker=next(marker_cycle), color=color ) if label_imbalanced: for covariate_name, covariate_diff in table1.loc[violating].iterrows(): ax.text( x=covariate_diff["unweighted"], y=covariate_diff["weighted"], s=covariate_name, horizontalalignment="left", ) # Plot vertical and horizontal threshold line if thresh is not None: ax.axvline(thresh, color="grey", linestyle="--", zorder=2) ax.axhline(thresh, color="grey", linestyle="--", zorder=2) # There are negative values, plot the minus of threshold if aggregated_table1.min().min().min() < 0: # There are negative values, plot the minus of threshold and adjust x-limits to be symmetric: ax.axvline(-thresh, color="grey", linestyle="--", zorder=2) ax.axhline(-thresh, color="grey", linestyle="--", zorder=2) ax.set_xlim(-np.max(np.abs(ax.get_xlim())), np.max(np.abs(ax.get_xlim()))) # adding labels metric_name = table1_folds[0] metric_name = method_pretty_name.get(metric_name, metric_name) ax.set_xlabel(f'Unweighted [{metric_name}]') ax.set_ylabel(f'Weighted [{metric_name}]') return ax
[docs]def plot_mean_features_imbalance_slope_folds( table1_folds, cv=None, thresh=None, label_imbalanced=True, ax=None ): method_pretty_name = { "smd": "Standard Mean Difference", "abs_smd": "Absolute Standard Mean Difference", "ks": "Kolmogorov-Smirnov", } # ax = ax or plt.gca() # Aggregate across folds. This will be used to determine order, and extreme values. # Use this groupby trick: aggregated_table1 = pd.concat(table1_folds) # type: pd.DataFrame aggregated_table1 = aggregated_table1.groupby(aggregated_table1.index) aggregated_table1 = aggregated_table1.mean() # Reorder: aggregated_table1 = aggregated_table1.sort_values(by="unweighted", ascending=True) # Slope graph: ax = slope_graph( left=aggregated_table1["unweighted"], right=aggregated_table1["weighted"], thresh=thresh, label_imbalanced=label_imbalanced, ax=ax, ) ax.set_ylabel( method_pretty_name.get( table1_folds[0], table1_folds[0] ) ) # ax.legend(loc="upper right") return ax
[docs]def slope_graph( left, right, thresh=None, label_imbalanced=True, color_below="C0", color_above="C1", marker="o", ax=None ): ax = ax or plt.gca() left_xtick = or "unweighted" right_xtick = or "weighted" if thresh is not None: ax.axhline(thresh, color="grey", linestyle="--", zorder=2) # There are negative values, plot the minus of threshold if left.min() < 0 or right.min() < 0: ax.axhline(-thresh, color="grey", linestyle="--", zorder=2) else: thresh = np.nan # will be now used to compare against values for idx in left.index: cur_left = left[idx] cur_right = right[idx] # make default color_below if thresh is nan cur_color = color_above if cur_right > thresh else color_below ax.plot( [left_xtick, right_xtick], [cur_left, cur_right], label=None, color=cur_color, marker=marker, ) if label_imbalanced and cur_right > thresh: ax.text(x=1.01, y=cur_right, s=idx, horizontalalignment="left") # Place y-tick labels on both sides: ax.tick_params(left=True, labelleft=True, right=True, labelright=True) return ax
[docs]def get_subplots(n_features, max_cols=5, fig_size=(16, 16), sharex=False, sharey=False): """Initializes the grid of subplots and returns the axes Args: n_features (int): The total number of features to plot max_cols (int): The maximal number of figures in each row of figures fig_size (tuple[int, int]): Passed on to matplotlib sharex (str|bool): will be passed to subplots sharey (str|bool): will be passed to subplots Returns: tuple[Figure, np.ndarray]: the figure and the array of axes """ # try to make the plots as square as possible ncols = min(int(np.round(np.sqrt(n_features))), max_cols) nrows = int(np.ceil(n_features / ncols)) fig, axes = plt.subplots( nrows=nrows, ncols=ncols, figsize=fig_size, sharex=sharex, sharey=sharey, squeeze=False, ) return fig, axes
def _add_diagonal( ax, fraction=0.04, label="x=y", color="grey", linestyle="--", zorder=1 ): diagonal = [ max(ax.get_xlim()[0], ax.get_ylim()[0]), min(ax.get_xlim()[1], ax.get_ylim()[1]), ] # ax.plot(diagonal, diagonal, color="black", label="x=y") # plot diagonal lim_range_frac = ( np.array([np.diff(ax.get_xlim()), np.diff(ax.get_ylim())]) * fraction ) while np.any(np.abs(np.diff(diagonal)[0]) < lim_range_frac): if np.abs(np.diff(diagonal)[0]) < lim_range_frac[0]: ax.set_ylim( *( ax.get_ylim() + np.diff(ax.get_ylim()) * [-fraction / 2, fraction / 2] ) ) if np.abs(np.diff(diagonal)[0]) < lim_range_frac[1]: ax.set_xlim( *( ax.get_xlim() + np.diff(ax.get_xlim()) * [-fraction / 2, fraction / 2] ) ) diagonal = [ max(ax.get_xlim()[0], ax.get_ylim()[0]), min(ax.get_xlim()[1], ax.get_ylim()[1]), ] # ax.plot(diagonal, diagonal, color="black") # extend diagonal lim_range_frac = ( np.array([np.diff(ax.get_xlim()), np.diff(ax.get_ylim())]) * fraction ) # plot diagonal ax.plot( diagonal, diagonal, color=color, label=label, linestyle=linestyle, zorder=zorder ) WEIGHT_PLOTS = { "weight_distribution": plot_propensity_score_distribution_folds, "covariate_balance_love": plot_mean_features_imbalance_love_folds, "covariate_balance_slope": plot_mean_features_imbalance_slope_folds, } OUTCOME_PLOTS = { "continuous_accuracy": plot_continuous_prediction_accuracy_folds, "residuals": plot_residual_folds, "common_support": plot_counterfactual_common_support_folds, } SHARED_PLOTS = { "roc_curve": plot_roc_curve_folds, "pr_curve": plot_precision_recall_curve_folds, "calibration": plot_calibration_folds, } OUTCOME_PLOTS.update(SHARED_PLOTS) WEIGHT_PLOTS.update(SHARED_PLOTS)