# coding: utf-8
"""Metrics to assess performance on classification task given class prediction
Functions named as ``*_score`` return a scalar value to maximize: the higher
the better
Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize:
the lower the better
"""
# Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
# Dariusz Brzezinski
# License: MIT
from __future__ import division
import warnings
import logging
import functools
from inspect import getcallargs
import numpy as np
import scipy as sp
from sklearn.metrics.classification import (_check_targets, _prf_divide,
precision_recall_fscore_support)
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.multiclass import unique_labels
try:
from inspect import signature
except ImportError:
from sklearn.externals.funcsigs import signature
LOGGER = logging.getLogger(__name__)
[docs]def sensitivity_specificity_support(y_true,
y_pred,
labels=None,
pos_label=1,
average=None,
warn_for=('sensitivity', 'specificity'),
sample_weight=None):
"""Compute sensitivity, specificity, and support for each class
The sensitivity is the ratio ``tp / (tp + fn)`` where ``tp`` is the number
of true positives and ``fn`` the number of false negatives. The sensitivity
quantifies the ability to avoid false negatives_[1].
The specificity is the ratio ``tn / (tn + fp)`` where ``tn`` is the number
of true negatives and ``fn`` the number of false negatives. The specificity
quantifies the ability to avoid false positives_[1].
The support is the number of occurrences of each class in ``y_true``.
If ``pos_label is None`` and in binary classification, this function
returns the average sensitivity and specificity if ``average``
is one of ``'weighted'``.
Parameters
----------
y_true : ndarray, shape (n_samples, )
Ground truth (correct) target values.
y_pred : ndarray, shape (n_samples, )
Estimated targets as returned by a classifier.
labels : list, optional
The set of labels to include when ``average != 'binary'``, and their
order if ``average is None``. Labels present in the data can be
excluded, for example to calculate a multiclass average ignoring a
majority negative class, while labels not present in the data will
result in 0 components in a macro average. For multilabel targets,
labels are column indices. By default, all labels in ``y_true`` and
``y_pred`` are used in sorted order.
pos_label : str or int, optional (default=1)
The class to report if ``average='binary'`` and the data is binary.
If the data are multiclass, this will be ignored;
setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
scores for that label only.
average : str or None, optional (default=None)
If ``None``, the scores for each class are returned. Otherwise, this
determines the type of averaging performed on the data:
``'binary'``:
Only report results for the class specified by ``pos_label``.
This is applicable only if targets (``y_{true,pred}``) are binary.
``'micro'``:
Calculate metrics globally by counting the total true positives,
false negatives and false positives.
``'macro'``:
Calculate metrics for each label, and find their unweighted
mean. This does not take label imbalance into account.
``'weighted'``:
Calculate metrics for each label, and find their average, weighted
by support (the number of true instances for each label). This
alters 'macro' to account for label imbalance; it can result in an
F-score that is not between precision and recall.
``'samples'``:
Calculate metrics for each instance, and find their average (only
meaningful for multilabel classification where this differs from
:func:`accuracy_score`).
warn_for : tuple or set, for internal use
This determines which warnings will be made in the case that this
function is being used to return only one of its metrics.
sample_weight : ndarray, shape (n_samples, )
Sample weights.
Returns
-------
sensitivity : float (if ``average`` = None) or ndarray, \
shape (n_unique_labels, )
specificity : float (if ``average`` = None) or ndarray, \
shape (n_unique_labels, )
support : int (if ``average`` = None) or ndarray, \
shape (n_unique_labels, )
The number of occurrences of each label in ``y_true``.
Examples
--------
>>> import numpy as np
>>> from imblearn.metrics import sensitivity_specificity_support
>>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
>>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
>>> sensitivity_specificity_support(y_true, y_pred, average='macro')
(0.33333333333333331, 0.66666666666666663, None)
>>> sensitivity_specificity_support(y_true, y_pred, average='micro')
(0.33333333333333331, 0.66666666666666663, None)
>>> sensitivity_specificity_support(y_true, y_pred, average='weighted')
(0.33333333333333331, 0.66666666666666663, None)
References
----------
.. [1] `Wikipedia entry for the Sensitivity and specificity
<https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
"""
average_options = (None, 'micro', 'macro', 'weighted', 'samples')
if average not in average_options and average != 'binary':
raise ValueError('average has to be one of ' + str(average_options))
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
present_labels = unique_labels(y_true, y_pred)
if average == 'binary':
if y_type == 'binary':
if pos_label not in present_labels:
if len(present_labels) < 2:
# Only negative labels
return (0., 0., 0)
else:
raise ValueError("pos_label=%r is not a valid label: %r" %
(pos_label, present_labels))
labels = [pos_label]
else:
raise ValueError("Target is %s but average='binary'. Please "
"choose another average setting." % y_type)
elif pos_label not in (None, 1):
warnings.warn("Note that pos_label (set to %r) is ignored when "
"average != 'binary' (got %r). You may use "
"labels=[pos_label] to specify a single positive class."
% (pos_label, average), UserWarning)
if labels is None:
labels = present_labels
n_labels = None
else:
n_labels = len(labels)
labels = np.hstack(
[labels, np.setdiff1d(
present_labels, labels, assume_unique=True)])
# Calculate tp_sum, pred_sum, true_sum ###
if y_type.startswith('multilabel'):
raise ValueError('imblearn does not support multilabel')
elif average == 'samples':
raise ValueError("Sample-based precision, recall, fscore is "
"not meaningful outside multilabel "
"classification. See the accuracy_score instead.")
else:
le = LabelEncoder()
le.fit(labels)
y_true = le.transform(y_true)
y_pred = le.transform(y_pred)
sorted_labels = le.classes_
# labels are now from 0 to len(labels) - 1 -> use bincount
tp = y_true == y_pred
tp_bins = y_true[tp]
if sample_weight is not None:
tp_bins_weights = np.asarray(sample_weight)[tp]
else:
tp_bins_weights = None
if len(tp_bins):
tp_sum = np.bincount(
tp_bins, weights=tp_bins_weights, minlength=len(labels))
else:
# Pathological case
true_sum = pred_sum = tp_sum = np.zeros(len(labels))
if len(y_pred):
pred_sum = np.bincount(
y_pred, weights=sample_weight, minlength=len(labels))
if len(y_true):
true_sum = np.bincount(
y_true, weights=sample_weight, minlength=len(labels))
# Compute the true negative
tn_sum = y_true.size - (pred_sum + true_sum - tp_sum)
# Retain only selected labels
indices = np.searchsorted(sorted_labels, labels[:n_labels])
tp_sum = tp_sum[indices]
true_sum = true_sum[indices]
pred_sum = pred_sum[indices]
tn_sum = tn_sum[indices]
if average == 'micro':
tp_sum = np.array([tp_sum.sum()])
pred_sum = np.array([pred_sum.sum()])
true_sum = np.array([true_sum.sum()])
tn_sum = np.array([tn_sum.sum()])
# Finally, we have all our sufficient statistics. Divide! #
with np.errstate(divide='ignore', invalid='ignore'):
# Divide, and on zero-division, set scores to 0 and warn:
# Oddly, we may get an "invalid" rather than a "divide" error
# here.
specificity = _prf_divide(tn_sum, tn_sum + pred_sum - tp_sum,
'specificity', 'predicted', average,
warn_for)
sensitivity = _prf_divide(tp_sum, true_sum, 'sensitivity', 'true',
average, warn_for)
# Average the results
if average == 'weighted':
weights = true_sum
if weights.sum() == 0:
return 0, 0, None
elif average == 'samples':
weights = sample_weight
else:
weights = None
if average is not None:
assert average != 'binary' or len(specificity) == 1
specificity = np.average(specificity, weights=weights)
sensitivity = np.average(sensitivity, weights=weights)
true_sum = None # return no support
return sensitivity, specificity, true_sum
[docs]def sensitivity_score(y_true,
y_pred,
labels=None,
pos_label=1,
average='binary',
sample_weight=None):
"""Compute the sensitivity
The sensitivity is the ratio ``tp / (tp + fn)`` where ``tp`` is the number
of true positives and ``fn`` the number of false negatives. The sensitivity
quantifies the ability to avoid false negatives.
The best value is 1 and the worst value is 0.
Parameters
----------
y_true : ndarray, shape (n_samples, )
Ground truth (correct) target values.
y_pred : ndarray, shape (n_samples, )
Estimated targets as returned by a classifier.
labels : list, optional
The set of labels to include when ``average != 'binary'``, and their
order if ``average is None``. Labels present in the data can be
excluded, for example to calculate a multiclass average ignoring a
majority negative class, while labels not present in the data will
result in 0 components in a macro average.
pos_label : str or int, optional (default=1)
The class to report if ``average='binary'`` and the data is binary.
If the data are multiclass, this will be ignored;
setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
scores for that label only.
average : str or None, optional (default=None)
If ``None``, the scores for each class are returned. Otherwise, this
determines the type of averaging performed on the data:
``'binary'``:
Only report results for the class specified by ``pos_label``.
This is applicable only if targets (``y_{true,pred}``) are binary.
``'micro'``:
Calculate metrics globally by counting the total true positives,
false negatives and false positives.
``'macro'``:
Calculate metrics for each label, and find their unweighted
mean. This does not take label imbalance into account.
``'weighted'``:
Calculate metrics for each label, and find their average, weighted
by support (the number of true instances for each label). This
alters 'macro' to account for label imbalance; it can result in an
F-score that is not between precision and recall.
``'samples'``:
Calculate metrics for each instance, and find their average (only
meaningful for multilabel classification where this differs from
:func:`accuracy_score`).
warn_for : tuple or set, for internal use
This determines which warnings will be made in the case that this
function is being used to return only one of its metrics.
sample_weight : ndarray, shape (n_samples, )
Sample weights.
Examples
--------
>>> import numpy as np
>>> from imblearn.metrics import sensitivity_score
>>> y_true = [0, 1, 2, 0, 1, 2]
>>> y_pred = [0, 2, 1, 0, 0, 1]
>>> sensitivity_score(y_true, y_pred, average='macro')
0.33333333333333331
>>> sensitivity_score(y_true, y_pred, average='micro')
0.33333333333333331
>>> sensitivity_score(y_true, y_pred, average='weighted')
0.33333333333333331
>>> sensitivity_score(y_true, y_pred, average=None)
array([ 1., 0., 0.])
Returns
-------
specificity : float (if ``average`` = None) or ndarray, \
shape (n_unique_labels, )
"""
s, _, _ = sensitivity_specificity_support(
y_true,
y_pred,
labels=labels,
pos_label=pos_label,
average=average,
warn_for=('sensitivity', ),
sample_weight=sample_weight)
return s
[docs]def specificity_score(y_true,
y_pred,
labels=None,
pos_label=1,
average='binary',
sample_weight=None):
"""Compute the specificity
The specificity is the ratio ``tp / (tp + fn)`` where ``tp`` is the number
of true positives and ``fn`` the number of false negatives. The specificity
is intuitively the ability of the classifier to find all the positive
samples.
The best value is 1 and the worst value is 0.
Parameters
----------
y_true : ndarray, shape (n_samples, )
Ground truth (correct) target values.
y_pred : ndarray, shape (n_samples, )
Estimated targets as returned by a classifier.
labels : list, optional
The set of labels to include when ``average != 'binary'``, and their
order if ``average is None``. Labels present in the data can be
excluded, for example to calculate a multiclass average ignoring a
majority negative class, while labels not present in the data will
result in 0 components in a macro average.
pos_label : str or int, optional (default=1)
The class to report if ``average='binary'`` and the data is binary.
If the data are multiclass, this will be ignored;
setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
scores for that label only.
average : str or None, optional (default=None)
If ``None``, the scores for each class are returned. Otherwise, this
determines the type of averaging performed on the data:
``'binary'``:
Only report results for the class specified by ``pos_label``.
This is applicable only if targets (``y_{true,pred}``) are binary.
``'micro'``:
Calculate metrics globally by counting the total true positives,
false negatives and false positives.
``'macro'``:
Calculate metrics for each label, and find their unweighted
mean. This does not take label imbalance into account.
``'weighted'``:
Calculate metrics for each label, and find their average, weighted
by support (the number of true instances for each label). This
alters 'macro' to account for label imbalance; it can result in an
F-score that is not between precision and recall.
``'samples'``:
Calculate metrics for each instance, and find their average (only
meaningful for multilabel classification where this differs from
:func:`accuracy_score`).
warn_for : tuple or set, for internal use
This determines which warnings will be made in the case that this
function is being used to return only one of its metrics.
sample_weight : ndarray, shape (n_samples, )
Sample weights.
Examples
--------
>>> import numpy as np
>>> from imblearn.metrics import specificity_score
>>> y_true = [0, 1, 2, 0, 1, 2]
>>> y_pred = [0, 2, 1, 0, 0, 1]
>>> specificity_score(y_true, y_pred, average='macro')
0.66666666666666663
>>> specificity_score(y_true, y_pred, average='micro')
0.66666666666666663
>>> specificity_score(y_true, y_pred, average='weighted')
0.66666666666666663
>>> specificity_score(y_true, y_pred, average=None)
array([ 0.75, 0.5 , 0.75])
Returns
-------
specificity : float (if ``average`` = None) or ndarray, \
shape (n_unique_labels, )
"""
_, s, _ = sensitivity_specificity_support(
y_true,
y_pred,
labels=labels,
pos_label=pos_label,
average=average,
warn_for=('specificity', ),
sample_weight=sample_weight)
return s
[docs]def geometric_mean_score(y_true,
y_pred,
labels=None,
pos_label=1,
average='multiclass',
sample_weight=None,
correction=0.0):
"""Compute the geometric mean
The geometric mean (G-mean) is the root of the product of class-wise
sensitivity. This measure tries to maximize the accuracy on each of the
classes while keeping these accuracies balanced. For binary classification
G-mean is the squared root of the product of the sensitivity
and specificity. For multi-class problems it is a higher root of the
product of sensitivity for each class.
For compatibility with other imbalance performance measures, G-mean can be
calculated for each class separately on a one-vs-rest basis when
``average != 'multiclass'``.
The best value is 1 and the worst value is 0. Traditionally if at least one
class is unrecognized by the classifier, G-mean resolves to zero. To
alleviate this property, for highly multi-class the sensitivity of
unrecognized classes can be "corrected" to be a user specified value
(instead of zero). This option works only if ``average == 'multiclass'``.
Parameters
----------
y_true : ndarray, shape (n_samples, )
Ground truth (correct) target values.
y_pred : ndarray, shape (n_samples, )
Estimated targets as returned by a classifier.
labels : list, optional
The set of labels to include when ``average != 'binary'``, and their
order if ``average is None``. Labels present in the data can be
excluded, for example to calculate a multiclass average ignoring a
majority negative class, while labels not present in the data will
result in 0 components in a macro average.
pos_label : str or int, optional (default=1)
The class to report if ``average='binary'`` and the data is binary.
If the data are multiclass, this will be ignored;
setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
scores for that label only.
average : str or None, optional (default=``'multiclass'``)
If ``None``, the scores for each class are returned. Otherwise, this
determines the type of averaging performed on the data:
``'binary'``:
Only report results for the class specified by ``pos_label``.
This is applicable only if targets (``y_{true,pred}``) are binary.
``'micro'``:
Calculate metrics globally by counting the total true positives,
false negatives and false positives.
``'macro'``:
Calculate metrics for each label, and find their unweighted
mean. This does not take label imbalance into account.
``'weighted'``:
Calculate metrics for each label, and find their average, weighted
by support (the number of true instances for each label). This
alters 'macro' to account for label imbalance; it can result in an
F-score that is not between precision and recall.
``'samples'``:
Calculate metrics for each instance, and find their average (only
meaningful for multilabel classification where this differs from
:func:`accuracy_score`).
sample_weight : ndarray, shape (n_samples, )
Sample weights.
correction: float, optional (default=0.0)
Substitutes sensitivity of unrecognized classes from zero to a given
value.
Returns
-------
geometric_mean : float
Examples
--------
>>> from imblearn.metrics import geometric_mean_score
>>> y_true = [0, 1, 2, 0, 1, 2]
>>> y_pred = [0, 2, 1, 0, 0, 1]
>>> geometric_mean_score(y_true, y_pred)
0.0
>>> geometric_mean_score(y_true, y_pred, correction=0.001)
0.010000000000000004
>>> geometric_mean_score(y_true, y_pred, average='macro')
0.47140452079103168
>>> geometric_mean_score(y_true, y_pred, average='micro')
0.47140452079103168
>>> geometric_mean_score(y_true, y_pred, average='weighted')
0.47140452079103168
>>> geometric_mean_score(y_true, y_pred, average=None)
array([ 0.8660254, 0. , 0. ])
References
----------
.. [1] Kubat, M. and Matwin, S. "Addressing the curse of
imbalanced training sets: one-sided selection" ICML (1997)
.. [2] Barandela, R., Sánchez, J. S., Garcıa, V., & Rangel, E. "Strategies
for learning in class imbalance problems", Pattern Recognition,
36(3), (2003), pp 849-851.
"""
if average is None or average != 'multiclass':
sen, spe, _ = sensitivity_specificity_support(
y_true,
y_pred,
labels=labels,
pos_label=pos_label,
average=average,
warn_for=('specificity', 'specificity'),
sample_weight=sample_weight)
LOGGER.debug('The sensitivity and specificity are : %s - %s' %
(sen, spe))
return np.sqrt(sen * spe)
else:
present_labels = unique_labels(y_true, y_pred)
if labels is None:
labels = present_labels
n_labels = None
else:
n_labels = len(labels)
labels = np.hstack([labels, np.setdiff1d(present_labels, labels,
assume_unique=True)])
le = LabelEncoder()
le.fit(labels)
y_true = le.transform(y_true)
y_pred = le.transform(y_pred)
sorted_labels = le.classes_
# labels are now from 0 to len(labels) - 1 -> use bincount
tp = y_true == y_pred
tp_bins = y_true[tp]
if sample_weight is not None:
tp_bins_weights = np.asarray(sample_weight)[tp]
else:
tp_bins_weights = None
if len(tp_bins):
tp_sum = np.bincount(tp_bins, weights=tp_bins_weights,
minlength=len(labels))
else:
# Pathological case
true_sum = tp_sum = np.zeros(len(labels))
if len(y_true):
true_sum = np.bincount(y_true, weights=sample_weight,
minlength=len(labels))
# Retain only selected labels
indices = np.searchsorted(sorted_labels, labels[:n_labels])
tp_sum = tp_sum[indices]
true_sum = true_sum[indices]
recall = _prf_divide(tp_sum, true_sum, "recall", "true", None,
"recall")
recall[recall == 0] = correction
return sp.stats.mstats.gmean(recall)
[docs]def make_index_balanced_accuracy(alpha=0.1, squared=True):
"""Balance any scoring function using the index balanced accuracy
This factory function wraps scoring function to express it as the
index balanced accuracy (IBA). You need to use this function to
decorate any scoring function.
Only metrics requiring ``y_pred`` can be corrected with the index
balanced accuracy. ``y_score`` cannot be used since the dominance
cannot be computed.
Parameters
----------
alpha : float, optional (default=0.1)
Weighting factor.
squared : bool, optional (default=True)
If ``squared`` is True, then the metric computed will be squared
before to be weighted.
Returns
-------
iba_scoring_func : callable,
Returns the scoring metric decorated which will automatically compute
the index balanced accuracy.
Examples
--------
>>> from imblearn.metrics import geometric_mean_score as gmean
>>> from imblearn.metrics import make_index_balanced_accuracy as iba
>>> gmean = iba(alpha=0.1, squared=True)(gmean)
>>> y_true = [1, 0, 0, 1, 0, 1]
>>> y_pred = [0, 0, 1, 1, 0, 1]
>>> print(gmean(y_true, y_pred, average=None))
[ 0.44444444 0.44444444]
"""
def decorate(scoring_func):
@functools.wraps(scoring_func)
def compute_score(*args, **kwargs):
# Create the list of tags
tags_scoring_func = getcallargs(scoring_func, *args, **kwargs)
# check that the scoring function does not need a score
# and only a prediction
if ('y_score' in tags_scoring_func or
'y_prob' in tags_scoring_func or
'y2' in tags_scoring_func):
raise AttributeError('The function {} has an unsupported'
' attribute. Metric with`y_pred` are the'
' only supported metrics is the only'
' supported.')
# Compute the score from the scoring function
_score = scoring_func(*args, **kwargs)
# Square if desired
if squared:
_score = np.power(_score, 2)
# Get the signature of the sens/spec function
sens_spec_sig = signature(sensitivity_specificity_support)
# We need to extract from kwargs only the one needed by the
# specificity and specificity
params_sens_spec = set(sens_spec_sig._parameters.keys())
# Make the intersection between the parameters
sel_params = params_sens_spec.intersection(
set(tags_scoring_func))
# Create a sub dictionary
tags_scoring_func = dict((k, tags_scoring_func[k])
for k in sel_params)
# Check if the metric is the geometric mean
if scoring_func.__name__ == 'geometric_mean_score':
if 'average' in tags_scoring_func:
if tags_scoring_func['average'] == 'multiclass':
tags_scoring_func['average'] = 'macro'
# We do not support multilabel so the only average supported
# is binary
elif (scoring_func.__name__ == 'accuracy_score' or
scoring_func.__name__ == 'jaccard_similarity_score'):
tags_scoring_func['average'] = 'binary'
# Create the list of parameters through signature binding
tags_sens_spec = sens_spec_sig.bind(
**tags_scoring_func)
# Call the sens/spec function
sen, spe, _ = sensitivity_specificity_support(
*tags_sens_spec.args,
**tags_sens_spec.kwargs)
# Compute the dominance
dom = sen - spe
return (1. + alpha * dom) * _score
return compute_score
return decorate
[docs]def classification_report_imbalanced(y_true,
y_pred,
labels=None,
target_names=None,
sample_weight=None,
digits=2,
alpha=0.1):
"""Build a classification report based on metrics used with imbalanced
dataset
Specific metrics have been proposed to evaluate the classification
performed on imbalanced dataset. This report compiles the
state-of-the-art metrics: precision/recall/specificity, geometric
mean, and index balanced accuracy of the
geometric mean.
Parameters
----------
y_true : ndarray, shape (n_samples, )
Ground truth (correct) target values.
y_pred : ndarray, shape (n_samples, )
Estimated targets as returned by a classifier.
labels : list, optional
The set of labels to include when ``average != 'binary'``, and their
order if ``average is None``. Labels present in the data can be
excluded, for example to calculate a multiclass average ignoring a
majority negative class, while labels not present in the data will
result in 0 components in a macro average.
target_names : list of strings, optional
Optional display names matching the labels (same order).
sample_weight : ndarray, shape (n_samples, )
Sample weights.
digits : int, optional (default=2)
Number of digits for formatting output floating point values
alpha : float, optional (default=0.1)
Weighting factor.
Returns
-------
report : string
Text summary of the precision, recall, specificity, geometric mean,
and index balanced accuracy.
Examples
--------
>>> import numpy as np
>>> from imblearn.metrics import classification_report_imbalanced
>>> y_true = [0, 1, 2, 2, 2]
>>> y_pred = [0, 0, 2, 2, 1] # doctest : +NORMALIZE_WHITESPACE
>>> target_names = ['class 0', 'class 1', \
'class 2'] # doctest : +NORMALIZE_WHITESPACE
>>> print(classification_report_imbalanced(y_true, y_pred, \
target_names=target_names))
pre rec spe f1 geo iba\
sup
<BLANKLINE>
class 0 0.50 1.00 0.75 0.67 0.71 0.48\
1
class 1 0.00 0.00 0.75 0.00 0.00 0.00\
1
class 2 1.00 0.67 1.00 0.80 0.82 0.69\
3
<BLANKLINE>
avg / total 0.70 0.60 0.90 0.61 0.63 0.51\
5
<BLANKLINE>
"""
if labels is None:
labels = unique_labels(y_true, y_pred)
else:
labels = np.asarray(labels)
last_line_heading = 'avg / total'
if target_names is None:
target_names = ['%s' % l for l in labels]
name_width = max(len(cn) for cn in target_names)
width = max(name_width, len(last_line_heading), digits)
headers = ["pre", "rec", "spe", "f1", "geo", "iba", "sup"]
fmt = '%% %ds' % width # first column: class name
fmt += ' '
fmt += ' '.join(['% 9s' for _ in headers])
fmt += '\n'
headers = [""] + headers
report = fmt % tuple(headers)
report += '\n'
# Compute the different metrics
# Precision/recall/f1
precision, recall, f1, support = precision_recall_fscore_support(
y_true,
y_pred,
labels=labels,
average=None,
sample_weight=sample_weight)
# Specificity
specificity = specificity_score(
y_true,
y_pred,
labels=labels,
average=None,
sample_weight=sample_weight)
# Geometric mean
geo_mean = geometric_mean_score(
y_pred,
y_true,
labels=labels,
average=None,
sample_weight=sample_weight)
# Index balanced accuracy
iba_gmean = make_index_balanced_accuracy(
alpha=alpha, squared=True)(geometric_mean_score)
iba = iba_gmean(
y_pred,
y_true,
labels=labels,
average=None,
sample_weight=sample_weight)
for i, label in enumerate(labels):
values = [target_names[i]]
for v in (precision[i], recall[i], specificity[i], f1[i], geo_mean[i],
iba[i]):
values += ["{0:0.{1}f}".format(v, digits)]
values += ["{0}".format(support[i])]
report += fmt % tuple(values)
report += '\n'
# compute averages
values = [last_line_heading]
for v in (np.average(
precision, weights=support), np.average(
recall, weights=support), np.average(
specificity, weights=support), np.average(
f1, weights=support), np.average(
geo_mean, weights=support), np.average(
iba, weights=support)):
values += ["{0:0.{1}f}".format(v, digits)]
values += ['{0}'.format(np.sum(support))]
report += fmt % tuple(values)
return report