Source code for assessment.uncertainty

"""
---------------------------------------------------------------------------
Created on Fri April  10 10:32:32 2023

----------------------------------------------------------------------------

**Title:**        ValidPath Toolbox - Uncertainty Analysis module

**Description:**  This is the Uncertainty Analysis module of the ValidPath toolbox. It is includes Uncertainty_Analysis class and several methods
              
**Classes:**      Uncertainty_Analysis
              

**Methods:**      get_report, auc_keras_, ci_, Delong_CI, compute_midrank, compute_midrank_weight, calc_pvalue, compute_ground_truth_statistics, delong_roc_variance, bootstrapping

---------------------------------------------------------------------------
Author: SeyedM.MousaviKahaki (seyed.kahaki@fda.hhs.gov)
Version ='1.0'
---------------------------------------------------------------------------
"""
import numpy as np
from sklearn.metrics import roc_auc_score
import scipy.stats
from scipy import stats
import os, os.path
import pandas as pd
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, cohen_kappa_score
from scipy.stats import norm
from sklearn.metrics import auc
import matplotlib.pyplot as plt 



[docs]
class Uncertainty_Analysis:
    def __init__(self):
        self.perform_Delong = False
        self.perform_Bootstrap = False
        self.plot_roc = False
        self.tag = 'My Results'
        self.n_bootstraps = 1000
        pass



[docs]
    def get_report (self, y_pred ,y_truth) :
        """ This method recieve the machine learning prediction output and the ground truth and report several metrics. This is the main metod of the Uncertainty_Analysis class which calls other methods to procude results.
        
        :Parameters:
           y_truth: ground_truth -  np.array of 0 and 1
           y_pred:  predictions - np.array of floats of the probability of being class 1
        :Returns:
            precision
            Precision Conficenc Interval
            Recall
            Recall Conficenc Interval
            AUC based on delong method and its Conficenc Interval and COV 
            False Positive Rate
            True Positive Rate
            AUC
            Confusion Matrix
                        
        """

        cmtx = pd.DataFrame(
            confusion_matrix(y_truth.round(), y_pred.round(), labels=[1, 0]), 
            index=['true:yes', 'true:no'], 
            columns=['pred:yes', 'pred:no']
        )


        ###############  Precision
        precision = precision_score(y_truth.round(), y_pred.round())
        ############### Recall
        recall = recall_score(y_truth.round(), y_pred.round())
        # ############### F1 score
        f1_score1 = f1_score(y_truth.round(), y_pred.round())
        # ############### Cohen's kappa
        cohen_kappa_score1 = cohen_kappa_score(y_truth.round(), y_pred.round())

        confusion_m = confusion_matrix(y_truth.round(), y_pred.round())

        TP = confusion_m[1][1]
        FP = confusion_m[0][1]
        TN = confusion_m[0][0]
        FN = confusion_m[1][0]

        n = TP+FP
        Precision_CI = self.ci_(TP, n, alpha=0.05)

        n = TP+FN
        Recall_CI = self.ci_(TP, n, alpha=0.05)

        fpr_keras, tpr_keras, thresholds_keras = roc_curve(y_truth, y_pred)

        auc_keras = self.auc_keras_(fpr_keras, tpr_keras)

        if self.perform_Delong:
            auc_delong, ci_delong, lower_upper_q, auc_delong_cov, auc_std = self.Delong_CI(y_pred ,y_truth)
        else:
            auc_delong = ci_delong = lower_upper_q = auc_delong_cov = auc_std = []

        ########## Print All Together
        print('####################')
        print("Results for "+self.tag)
        print(cmtx)
        print("Precision: ",precision)
        print("Precision_CI: ",Precision_CI)
        print("Recall: ",recall)
        print("Recall_CI: ",Recall_CI)
        if self.perform_Delong:
            print("Delong Method")
            print('AUC:', auc_delong)
            print('AUC COV:', auc_delong_cov)
            print('95% AUC CI:', ci_delong)
        # ### Bootstrap
        if self.perform_Bootstrap:
            bootstrapped_scores, confidence_lower, confidence_upper = self.bootstrapping(y_truth, y_pred)
        else:
            bootstrapped_scores =  confidence_lower =  confidence_upper = {}
                
        if self.plot_roc ==True :
            # ### AUC Plot
            plt.figure(1)
            plt.plot([0, 1], [0, 1], 'k--')
            plt.plot(fpr_keras, tpr_keras, label='keras (AUC = {:.3f})'.format(auc_keras))
            plt.xlabel('False positive rate')
            plt.ylabel('True positive rate')
            plt.title('ROC curve')
            plt.legend(loc='best')
            plt.show()
            
        
        return (precision,Precision_CI,recall,Recall_CI, auc_delong ,auc_delong_cov ,ci_delong , fpr_keras, tpr_keras ,auc_keras ,cmtx)



[docs]
    def auc_keras_(self, fpr_keras, tpr_keras):
        
        """ Estimates confidence interval for Bernoulli p
        
        :Parameters:
          fpr_keras: False Positive Rate Values
          tpr_keras: True Positive Rate Values

        
        :Returns:
          AUC: Area Under the ROC Curve
        """
        
        auc_keras = auc(fpr_keras, tpr_keras)


        #return auc_keras
    

[docs]
    def ci_(self, tp, n, alpha=0.05):
        """ Estimates confidence interval for Bernoulli p
        
        :Parameters:
          tp: number of positive outcomes, TP in this case
          n: number of attemps, TP+FP for Precision, TP+FN for Recall
          alpha: confidence level
        
        :Returns:
          Tuple[float, float]: lower and upper bounds of the confidence interval
        """
        p_hat = float(tp) / n
        z_score = norm.isf(alpha * 0.5)  # two sides, so alpha/2 on each side
        variance_of_sum = p_hat * (1-p_hat) / n
        std = variance_of_sum ** 0.5
        return p_hat - z_score * std, p_hat + z_score * std

    

[docs]
    def Delong_CI(self, y_pred ,y_truth):
        """ A Python implementation of an algorithm for computing the statistical significance of comparing two sets of predictions by ROC AUC. 
        Also can compute variance of a single ROC AUC estimate. X. Sun and W. Xu, "Fast Implementation of DeLong’s Algorithm for Comparing the 
        Areas Under Correlated Receiver Operating Characteristic Curves," in IEEE Signal Processing Letters, vol. 21, no. 11, pp. 1389-1393, Nov. 2014, 
        doi: 10.1109/LSP.2014.2337313.
        
        :Parameters:
           y_truth: ground_truth -  np.array of 0 and 1
           y_pred:  predictions - np.array of floats of the probability of being class 1
        :Returns:
            auc, ci, lower_upper_q, auc_cov, auc_std
        
        """
        alpha = .95
        
        y_true = np.array(y_truth)

        auc, auc_cov = self.delong_roc_variance(
            y_true,
            y_pred)

        auc_std = np.sqrt(auc_cov)
        lower_upper_q = np.abs(np.array([0, 1]) - (1 - alpha) / 2)

        ci = stats.norm.ppf(
            lower_upper_q,
            loc=auc,
            scale=auc_std)

        ci[ci > 1] = 1
        return auc, ci, lower_upper_q, auc_cov, auc_std

    
    # stackoverflow.com/questions/19124239/scikit-learn-roc-curve-with-confidence-intervals

[docs]
    def compute_midrank(self,x):
        """Computes midranks.
        
        :Parameters:
           x - a 1D numpy array
        
        :Returns:
           array of midranks
        """
        J = np.argsort(x)
        Z = x[J]
        N = len(x)
        T = np.zeros(N, dtype=float)
        i = 0
        while i < N:
            j = i
            while j < N and Z[j] == Z[i]:
                j += 1
            T[i:j] = 0.5*(i + j - 1)
            i = j
        T2 = np.empty(N, dtype=float)
        # Note(kazeevn) +1 is due to Python using 0-based indexing
        # instead of 1-based in the AUC formula in the paper
        T2[J] = T + 1
        return T2

        

[docs]
    def compute_midrank_weight(self, x, sample_weight):
        """Computes midranks.
        
        :Parameters:
           x - a 1D numpy array
           
        :Returns:
           array of midranks
        """
        J = np.argsort(x)
        Z = x[J]
        cumulative_weight = np.cumsum(sample_weight[J])
        N = len(x)
        T = np.zeros(N, dtype=float)
        i = 0
        while i < N:
            j = i
            while j < N and Z[j] == Z[i]:
                j += 1
            T[i:j] = cumulative_weight[i:j].mean()
            i = j
        T2 = np.empty(N, dtype=float)
        T2[J] = T
        return T2

    
    

[docs]
    def fastDeLong(self, predictions_sorted_transposed, label_1_count, sample_weight):
        if sample_weight is None:
            return self.fastDeLong_no_weights(predictions_sorted_transposed, label_1_count)
        else:
            return self.fastDeLong_weights(predictions_sorted_transposed, label_1_count, sample_weight)

    

[docs]
    def fastDeLong_weights(self, predictions_sorted_transposed, label_1_count, sample_weight):
        """
        The fast version of DeLong's method for computing the covariance of
        unadjusted AUC.
        
        :Parameters:
           predictions_sorted_transposed: a 2D numpy.array[n_classifiers, n_examples]
              sorted such as the examples with label "1" are first
        :Returns:
           (AUC value, DeLong covariance)
           
        :Reference:
         @article{sun2014fast,
           title={Fast Implementation of DeLong's Algorithm for Comparing the Areas Under Correlated Receiver Oerating Characteristic Curves},
           author={Xu Sun and Weichao Xu},
           journal={IEEE Signal Processing Letters},
           volume={21},
           number={11},
           pages={1389--1393},
           year={2014},
           publisher={IEEE}
         }
        """
        # Short variables are named as they are in the paper
        m = label_1_count
        n = predictions_sorted_transposed.shape[1] - m
        positive_examples = predictions_sorted_transposed[:, :m]
        negative_examples = predictions_sorted_transposed[:, m:]
        k = predictions_sorted_transposed.shape[0]

        tx = np.empty([k, m], dtype=float)
        ty = np.empty([k, n], dtype=float)
        tz = np.empty([k, m + n], dtype=float)
        for r in range(k):
            tx[r, :] = self.compute_midrank_weight(positive_examples[r, :], sample_weight[:m])
            ty[r, :] = self.compute_midrank_weight(negative_examples[r, :], sample_weight[m:])
            tz[r, :] = self.compute_midrank_weight(predictions_sorted_transposed[r, :], sample_weight)
        total_positive_weights = sample_weight[:m].sum()
        total_negative_weights = sample_weight[m:].sum()
        pair_weights = np.dot(sample_weight[:m, np.newaxis], sample_weight[np.newaxis, m:])
        total_pair_weights = pair_weights.sum()
        aucs = (sample_weight[:m]*(tz[:, :m] - tx)).sum(axis=1) / total_pair_weights
        v01 = (tz[:, :m] - tx[:, :]) / total_negative_weights
        v10 = 1. - (tz[:, m:] - ty[:, :]) / total_positive_weights
        sx = np.cov(v01)
        sy = np.cov(v10)
        delongcov = sx / m + sy / n
        return aucs, delongcov

    
    

[docs]
    def fastDeLong_no_weights(self, predictions_sorted_transposed, label_1_count):
        """
        The fast version of DeLong's method for computing the covariance of
        unadjusted AUC.
        
        :Parameters:
           predictions_sorted_transposed: a 2D numpy.array[n_classifiers, n_examples]
              sorted such as the examples with label "1" are first
        
        :Returns:
           (AUC value, DeLong covariance)
        
        Reference:
         @article{sun2014fast,
           title={Fast Implementation of DeLong's Algorithm for Comparing the Areas Under Correlated Receiver Oerating Characteristic Curves},
           author={Xu Sun and Weichao Xu},
           journal={IEEE Signal Processing Letters},
           volume={21},
           number={11},
           pages={1389--1393},
           year={2014},
           publisher={IEEE}
         }
        """
        # Short variables are named as they are in the paper
        m = label_1_count
        n = predictions_sorted_transposed.shape[1] - m
        positive_examples = predictions_sorted_transposed[:, :m]
        negative_examples = predictions_sorted_transposed[:, m:]
        k = predictions_sorted_transposed.shape[0]

        tx = np.empty([k, m], dtype=float)
        ty = np.empty([k, n], dtype=float)
        tz = np.empty([k, m + n], dtype=float)
        for r in range(k):
            tx[r, :] = self.compute_midrank(positive_examples[r, :])
            ty[r, :] = self.compute_midrank(negative_examples[r, :])
            tz[r, :] = self.compute_midrank(predictions_sorted_transposed[r, :])
        aucs = tz[:, :m].sum(axis=1) / m / n - float(m + 1.0) / 2.0 / n
        v01 = (tz[:, :m] - tx[:, :]) / n
        v10 = 1.0 - (tz[:, m:] - ty[:, :]) / m
        sx = np.cov(v01)
        sy = np.cov(v10)
        delongcov = sx / m + sy / n
        return aucs, delongcov

    
    

[docs]
    def calc_pvalue(self, aucs, sigma):
        """Computes log(10) of p-values.
        
        :Parameters:
           aucs: 1D array of AUCs
           sigma: AUC DeLong covariances
        
        :Returns:
           log10(pvalue)
        """
        l = np.array([[1, -1]])
        z = np.abs(np.diff(aucs)) / np.sqrt(np.dot(np.dot(l, sigma), l.T))
        return np.log10(2) + scipy.stats.norm.logsf(z, loc=0, scale=1) / np.log(10)

        
        

[docs]
    def compute_ground_truth_statistics(self, ground_truth, sample_weight):
        assert np.array_equal(np.unique(ground_truth), [0, 1])
        order = (-ground_truth).argsort()
        label_1_count = int(ground_truth.sum())
        if sample_weight is None:
            ordered_sample_weight = None
        else:
            ordered_sample_weight = sample_weight[order]

        return order, label_1_count, ordered_sample_weight

        
        

[docs]
    def delong_roc_variance(self, ground_truth, predictions, sample_weight=None):
        """
        Computes ROC AUC variance for a single set of predictions
        
        :Parameters:
           ground_truth: np.array of 0 and 1
           predictions: np.array of floats of the probability of being class 1
        """
        order, label_1_count, ordered_sample_weight = self.compute_ground_truth_statistics(
            ground_truth, sample_weight)
        predictions_sorted_transposed = predictions[np.newaxis, order]
        aucs, delongcov = self.fastDeLong(predictions_sorted_transposed, label_1_count, ordered_sample_weight)
        assert len(aucs) == 1
        return aucs[0], delongcov

        
        

[docs]
    def bootstrapping(self, y_true, y_pred):
        """
        Computes ROC AUC variance for a single set of predictions
        
        :Parameters:
           ground_truth: np.array of 0 and 1
           predictions: np.array of floats of the probability of being class 1
        """
        print("Original ROC area: {:0.3f}".format(roc_auc_score(y_true, y_pred)))
        
        rng_seed = 42  # control reproducibility
        bootstrapped_scores = []

        rng = np.random.RandomState(rng_seed)
        for i in range(self.n_bootstraps):
            # bootstrap by sampling with replacement on the prediction indices
            indices = rng.randint(0, len(y_pred), len(y_pred))
            if len(np.unique(y_true[indices])) < 2:
                # We need at least one positive and one negative sample for ROC AUC
                # to be defined: reject the sample
                continue

            score = roc_auc_score(y_true[indices], y_pred[indices])
            bootstrapped_scores.append(score)
            print("Bootstrap #{} ROC area: {:0.3f}".format(i + 1, score))


        plt.hist(bootstrapped_scores, bins=50)
        plt.title('Histogram of the bootstrapped ROC AUC scores')
        plt.show()

        sorted_scores = np.array(bootstrapped_scores)
        sorted_scores.sort()

        # Computing the lower and upper bound of the 90% confidence interval
        # You can change the bounds percentiles to 0.025 and 0.975 to get
        # a 95% confidence interval instead.
        confidence_lower = sorted_scores[int(0.05 * len(sorted_scores))]
        confidence_upper = sorted_scores[int(0.95 * len(sorted_scores))]
        print("Bootstrap")
        print("Confidence interval for the score: [{:0.3f} - {:0.3}]".format(
            confidence_lower, confidence_upper))
        return bootstrapped_scores, confidence_lower, confidence_upper