Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ name: Unit Tests

on:
push:
branches: [ main, docs_tests ] # run when anything is pushed to these branches
branches:
- main
- docs_tests # run when anything is pushed to these branches
pull_request:
branches: [ main ] # run for the code submitted as a PR to these branches

Expand Down Expand Up @@ -61,7 +63,6 @@ jobs:
fail_ci_if_error: true
verbose: true
version: "v0.1.15"
codecov:
token: ${{ secrets.CODECOV_TOKEN }}

build_docs:
Expand Down
85 changes: 56 additions & 29 deletions MetricsReloaded/metrics/calibration_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,6 @@
import warnings
# from metrics.pairwise_measures import CacheFunctionOutput
from MetricsReloaded.utility.utils import (
CacheFunctionOutput,
max_x_at_y_more,
max_x_at_y_less,
min_x_at_y_more,
min_x_at_y_less,
trapezoidal_integration,
one_hot_encode,
median_heuristic
)
Expand All @@ -51,6 +45,25 @@


class CalibrationMeasures(object):
"""
Class allowing the derivation of calibration measures given probability input:
The possible metrics are:

* expected calibration error (ece)
* Brier Score
* Root Brier score
* Logarithmic score
* Class wise expectation calibration error
* Kernel based ECE
* negative log likelihood

:param pred_proba: predicted probabilities
:param ref: reference
:param case: if required list of cases to consider
:param measures: list of measures to extract
:param empty: flag indicating whether there are empty references
:param dict_args: dictionary with additional arguments for the metrics if needed
"""
def __init__(
self,
pred_proba,
Expand Down Expand Up @@ -89,7 +102,7 @@ def class_wise_expectation_calibration_error(self):

.. math::

cwECE = \dfrac{1}{K}\sum_{k=1}^{K}\sum_{i=1}^{N}\dfrac{\vert B_{i,k} \vert}{N} \left(y_{k}(B_{i,k}) - p_{k}(B_{i,k})\right)
cwECE = \\dfrac{1}{K}\sum_{k=1}^{K}\sum_{i=1}^{N}\\dfrac{\\vert B_{i,k} \\vert}{N} \\left(y_{k}(B_{i,k}) - p_{k}(B_{i,k})\\right)

Comment on lines +105 to 106
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
ruff check MetricsReloaded/metrics/calibration_measures.py --select W605 -n

Repository: Project-MONAI/MetricsReloaded

Length of output: 22957


Resolve W605 invalid escape sequences in formula docstrings.

The math expressions in docstrings contain unescaped backslashes. Convert to raw strings (prefix with r) or escape backslashes (e.g., \\dfrac\\\\dfrac) to resolve these violations.

Also applies to: 167-168, 214-215, 262, 282, 295-296, 406, 444, 469, 501-502

🧰 Tools
🪛 Ruff (0.15.9)

[warning] 111-111: Invalid escape sequence: \s

Add backslash to escape sequence

(W605)


[warning] 111-111: Invalid escape sequence: \s

Add backslash to escape sequence

(W605)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@MetricsReloaded/metrics/calibration_measures.py` around lines 111 - 112,
Several docstrings in MetricsReloaded/metrics/calibration_measures.py contain
LaTeX math with unescaped backslashes (e.g., the cwECE formula), causing W605
errors; fix by making those docstrings raw strings (prefix with r) or escaping
backslashes (e.g., \\dfrac → \\\\dfrac). Update the docstrings that include the
formulas referenced around the cwECE expression and the other occurrences noted
(lines near 167-168, 214-215, 262, 282, 295-296, 406, 444, 469, 501-502) so the
functions/classes that contain them (search for the docstrings in
calibration_measures.py and identifiers like cwECE) use r"..." or properly
escaped backslashes throughout. Ensure you change only the docstring delimiters
and escapes, not the formula text itself.

:return: cwece
"""
Expand All @@ -103,14 +116,14 @@ def class_wise_expectation_calibration_error(self):
range_values = np.arange(0, 1.00001, step)
list_values = []
numb_samples = self.pred.shape[0]
class_pred = np.argmax(self.pred, 1)
#class_pred = np.argmax(self.pred, 1)
n_classes = self.pred.shape[1]
for k in range(n_classes):
list_values_k = []
for (l, u) in zip(range_values[:-1], range_values[1:]):
for (lo, up) in zip(range_values[:-1], range_values[1:]):
pred_k = self.pred[:, k]
ref_tmp = np.where(
np.logical_and(pred_k > l, pred_k <= u),
np.logical_and(pred_k > lo, pred_k <= up),
self.ref,
np.ones_like(self.ref) * -1,
)
Expand All @@ -121,7 +134,7 @@ def class_wise_expectation_calibration_error(self):
nsamples = np.size(ref_sel)
prop = np.sum(ref_selk) / nsamples
pred_tmp = np.where(
np.logical_and(pred_k > l, pred_k <= u),
np.logical_and(pred_k > lo, pred_k <= up),
pred_k,
np.ones_like(pred_k) * -1,
)
Expand All @@ -146,7 +159,7 @@ def expectation_calibration_error(self):

.. math::

ECE = \sum_{m=1}^{M} \dfrac{|B_m|}{n}(\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_ik==ref_ik)-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i)
ECE = \sum_{m=1}^{M} \dfrac{|B_m|}{n}(\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_{ik}==ref_{ik})-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i)

:return: ece

Expand All @@ -161,17 +174,17 @@ def expectation_calibration_error(self):
list_values = []
numb_samples = 0
pred_prob = self.pred[:,1]
for (l, u) in zip(range_values[:-1], range_values[1:]):
for (lo, up) in zip(range_values[:-1], range_values[1:]):
ref_tmp = np.where(
np.logical_and(pred_prob > l, pred_prob <= u),
np.logical_and(pred_prob > lo, pred_prob <= up),
self.ref,
np.ones_like(self.ref) * -1,
)
ref_sel = ref_tmp[ref_tmp > -1]
nsamples = np.size(ref_sel)
prop = np.sum(ref_sel) / nsamples
pred_tmp = np.where(
np.logical_and(pred_prob > l, pred_prob <= u),
np.logical_and(pred_prob > lo, pred_prob <= up),
pred_prob,
np.ones_like(pred_prob) * -1,
)
Expand All @@ -193,7 +206,7 @@ def maximum_calibration_error(self):

.. math::

MCE = max(|\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_ik==ref_ik)-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i|)
MCE = max(|\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_{ik}==ref_{ik})-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i|)

:return: mce

Expand All @@ -206,19 +219,18 @@ def maximum_calibration_error(self):
step = 1.0 / nbins
range_values = np.arange(0, 1.00001, step)
list_values = []
numb_samples = 0
pred_prob = self.pred[:,1]
for (l, u) in zip(range_values[:-1], range_values[1:]):
for (lo, up) in zip(range_values[:-1], range_values[1:]):
ref_tmp = np.where(
np.logical_and(pred_prob > l, pred_prob <= u),
np.logical_and(pred_prob > lo, pred_prob <= up),
self.ref,
np.ones_like(self.ref) * -1,
)
ref_sel = ref_tmp[ref_tmp > -1]
nsamples = np.size(ref_sel)
prop = np.sum(ref_sel) / nsamples
pred_tmp = np.where(
np.logical_and(pred_prob > l, pred_prob <= u),
np.logical_and(pred_prob > lo, pred_prob <= up),
pred_prob,
np.ones_like(pred_prob) * -1,
)
Expand Down Expand Up @@ -274,13 +286,12 @@ def logarithmic_score(self):

.. math::

LS = 1/N\sum_{i=1}^{N}\log{pred_ik}ref_{ik}
LS = 1/N\sum_{i=1}^{N}\log{pred_{ik}}ref_{ik}

:return: ls
"""
eps = 1e-10
log_pred = np.log(self.pred + eps)
to_log = self.pred[np.arange(log_pred.shape[0]),self.ref]
to_sum = log_pred[np.arange(log_pred.shape[0]),self.ref]
ls = np.mean(to_sum)
return ls
Expand All @@ -289,6 +300,9 @@ def distance_ij(self,i,j):
"""
Determines the euclidean distance between two vectors of prediction for two samples i and j

:param i: index of first sample
:param j: index of second sample with which to calculate distance

:return: distance
"""
pred_i = self.pred[i,:]
Expand All @@ -299,7 +313,10 @@ def distance_ij(self,i,j):

def kernel_calculation(self, i,j):
"""
Defines the kernel value for two samples i and j with the following definition for k(x_i,x_j)
Defines the kernel value for two samples i and j with the following definition for :math:`k(x_i,x_j)`

:param i: index of first sample
:param j: index of second sample

.. math::

Expand Down Expand Up @@ -414,13 +431,16 @@ def gamma_ik(self, i, k):
"""
Definition of gamma value for sample i class k of the predictions

:param i: index of the sample
:param k: index of the class

.. math::

gamma_{ik} = \Gamma(pred_{ik}/h + 1)
\gamma_{ik} = \Gamma(pred_{ik}/h + 1)

where h is the bandwidth value set as default to 0.5

:return gamma_ik
:return: gamma_ik

"""
pred_ik = self.pred[i, k]
Expand All @@ -436,6 +456,9 @@ def dirichlet_kernel(self, j, i):
"""
Calculation of Dirichlet kernel value for predictions of samples i and j

:param i: index of first sample to consider
:param j: index of second sample to consider

.. math::

k_{Dir}(x_j,x_i) = \dfrac{\Gamma(\sum_{k=1}^{K}\\alpha_{ik})}{\prod_{k=1}^{K}\\alpha_{ik}}\prod_{k=1}^{K}x_jk^{\\alpha_{ik}-1}
Expand Down Expand Up @@ -470,10 +493,10 @@ def negative_log_likelihood(self):

.. math::

NLL = -\dfrac{1}{N}\sum_{i=1}^{N}\sum_{k=1}^{C} y_{ik} \dot log(p_{i,k})
NLL = -\dfrac{1}{N}\sum_{i=1}^{N}\sum_{k=1}^{C} y_{ik}\log(p_{i,k})

where :math: `y_{ik}` the outcome is 1 if the class of :math: `y_{i}` is k and :math: `p_{ik}` is the predicted
probability for sample :math: `x_i` and class k
where :math:`y_{ik}` the outcome is 1 if the class of :math:`y_{i}` is k and :math:`p_{ik}` is the predicted
probability for sample :math:`x_i` and class k

:return: NLL

Expand All @@ -485,7 +508,11 @@ def negative_log_likelihood(self):
return nll

def to_dict_meas(self, fmt="{:.4f}"):
"""Given the selected metrics provides a dictionary with relevant metrics"""
"""
Given the selected metrics provides a dictionary with relevant metrics

:return: result_dict dictionary of results
"""
result_dict = {}
for key in self.measures:
result = self.measures_dict[key][0]()
Expand Down
Loading
Loading