#!/usr/bin/env python
# -*- coding: utf-8 -*--
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
"""Fairness metrics for evaluating a model"""
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Callable, List, Optional, Union
from guardian_ai.fairness.utils.lazy_loader import LazyLoader
from guardian_ai.fairness.metrics.utils import (
DEFAULT_DISTANCE,
DEFAULT_REDUCTION,
_DistanceMetric,
_FairnessScorer,
_get_check_arrays,
_get_check_distance,
_get_check_inputs,
_get_check_reduction,
_get_check_reduction_distance_subgroups,
_get_score_group_from_metrics,
_place_space_before_capital_letters,
_y_to_aifm_ds,
_get_rate_scorer,
_inhouse_metrics,
_aif360_to_automl_metric_names,
)
from guardian_ai.utils.exception import GuardianAIValueError
if TYPE_CHECKING:
import numpy as np
import pandas as pd
from aif360.metrics import ClassificationMetric
else:
np = LazyLoader("numpy")
pd = LazyLoader("pandas")
ClassificationMetric = LazyLoader(
"aif360.metrics", "ClassificationMetric", suppress_import_warnings=True
)
_valid_regression_metrics = [
"TPR",
"statistical_parity",
"FPR",
"FNR",
"FOR",
"FDR",
"error_rate",
]
def _model_metric(
y_true: Optional[Union[pd.Series, np.ndarray, List]],
y_pred: Union[pd.Series, np.ndarray, List],
subgroups: pd.DataFrame,
metric: str,
distance_measure: Optional[str],
reduction: Optional[str],
allow_y_true_none: bool,
allow_distance_measure_none: bool,
):
"""
Compute engine for all group pairs model metrics.
This computes a given metric on all group pairs for a specified ``subgroups`` input.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list or None
Array of groundtruth labels.
y_pred : pandas.Series, numpy.ndarray, list
Array of model predictions.
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
metric : str
Name of the base metric to be called.
distance_measure : str or None
Determines the distance used to compare a subgroup's metric
against the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
Only allowed if `allow_distance_measure_none` is set to True
reduction : str or None
Determines how to reduce scores on all subgroups to
a single output. Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
allow_y_true_none : bool
Whether or not to allow `y_true` to be set to ``None``.
allow_distance_measure_none : bool
Whether or not to allow ``distance_measure`` to be set
to ``None``.
Returns
-------
float, dict
The computed metric value, with format according to `reduction`.
"""
y_true, y_pred = _get_check_arrays(y_true, y_pred, allow_y_true_none)
if _aif360_to_automl_metric_names[metric] in _inhouse_metrics:
# We can calcualte the simple rate-based metrics 1-2 orders of
# magnitude faster than AIF360 does for some reason
reduction, distance = _get_check_reduction_distance_subgroups(
reduction, distance_measure, subgroups, allow_distance_measure_none
)
rate_scorer = _get_rate_scorer(_aif360_to_automl_metric_names[metric])
rates = {}
for group, _ in subgroups.groupby(list(subgroups.columns)):
mask = (subgroups == group).all(1).to_numpy().squeeze()
group = group if len(group) > 1 else group[0]
rates[group] = rate_scorer(
y_true[mask] if y_true is not None else None,
y_pred[mask],
)
groups = []
scores = []
visited_subgroup_pairs = set()
for group1 in sorted(rates):
for group2 in sorted(rates):
if group1 == group2:
continue
group_repr = (group1, group2)
if (group_repr[1], group_repr[0]) not in visited_subgroup_pairs:
score = distance.from_raw_scores(rates[group1], rates[group2])
scores.append(score)
groups.append(group_repr)
visited_subgroup_pairs.add(group_repr)
else:
# We don't support the more complicated metrics to implement
# so we rely on AIF360 for them.
(
reduction,
distance,
attr_vals_to_idx,
attr_idx_to_vals,
subgroup_divisions,
) = _get_check_inputs(
reduction, distance_measure, subgroups, allow_distance_measure_none
)
ds_pred = _y_to_aifm_ds(y_pred, subgroups, attr_vals_to_idx)
# Certain metrics like statistical disparity don't use ground truth labels.
# AIF360 still needs a labels dataset, so we copy the predicted dataset.
if y_true is None:
ds_true = ds_pred.copy()
else:
ds_true = _y_to_aifm_ds(y_true, subgroups, attr_vals_to_idx)
groups = []
scores = []
visited_subgroup_pairs = set()
# subgroup_divisions is a list of all subgroup pairs,
# e.g. [([{'sex': 0, 'race': 0}], [{'sex': 0, 'race': 1}]), ...]
for unpriv_group, priv_group in subgroup_divisions:
subgroup_metrics = ClassificationMetric(
ds_true, ds_pred, unpriv_group, priv_group
)
score, group_repr = _get_score_group_from_metrics(
subgroup_metrics,
distance,
metric,
unpriv_group,
priv_group,
attr_idx_to_vals,
)
if (group_repr[1], group_repr[0]) not in visited_subgroup_pairs:
scores.append(score)
groups.append(group_repr)
visited_subgroup_pairs.add(group_repr)
return reduction(groups, scores)
class _AllGroupPairsModelFairnessScorer(_FairnessScorer):
"""
Common base object for all group pairs model metrics.
This stores settings to pass on to the ``_all_group_pairs_model_metric`` compute
engine and does subgroups generation from a `protected_attributes` array on
an input array of instances ``X``.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
metric : str or Callable
Name of the base metric to be called.
distance_measure : str or None, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
allow_distance_measure_none : bool, default=True
Whether or not to allow ``distance_measure`` to be set to ``None``.
"""
def __init__(
self,
protected_attributes: Union[pd.Series, np.ndarray, List, str],
metric: Union[str, Callable],
distance_measure: Optional[str] = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
allow_distance_measure_none: bool = True,
):
super().__init__(protected_attributes, metric)
self.distance_measure = _get_check_distance(
distance_measure, allow_distance_measure_none
)
self.reduction = _get_check_reduction(reduction)
def __call__( # type: ignore[override]
self,
model: Any,
X: pd.DataFrame,
y_true: Union[pd.Series, np.ndarray, List],
supplementary_features: Optional[pd.DataFrame] = None,
):
"""
Compute the metric using a model's predictions on a given array
of instances ``X``.
Parameters
----------
model: Any
Object that implements a `predict(X)` function to collect
categorical predictions.
X : pandas.DataFrame
Array of instances to compute the metric on.
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels.
supplementary_features : pandas.DataFrame or None, default=None
Array of supplementary features for each instance. Used in case
one attribute in ``self.protected_attributes`` is not contained by
``X`` (e.g. if the protected attribute is not used by the model).
Returns
-------
float, dict
The computed metric value, with format according to ``self.reduction``.
Raises
------
GuardianAIValueError
- if a feature is present in both ``X``
and ``supplementary_features``.
"""
y_pred = model.predict(X)
subgroups = self._get_check_subgroups(X, supplementary_features)
return self.metric(
y_true, y_pred, subgroups, self.distance_measure, self.reduction
)
@property
def display_name(self):
base_display_name = super().display_name
fullname = " ".join(
[
self.reduction.display_name,
base_display_name,
self.distance_measure.display_name,
self._display_name_protected_attributes,
]
)
fullname = " ".join(fullname.split())
return _place_space_before_capital_letters(fullname)
[docs]
class ModelStatisticalParityScorer(_AllGroupPairsModelFairnessScorer): # noqa: D412
"""
Measure the statistical parity [1] of a model's output between subgroups
and the rest of the population.
Statistical parity (also known as Base Rate or Disparate Impact) states that
a predictor is unbiased if the prediction is independent of the protected
attribute.
Statistical Parity is calculated as PP / N, where PP and N are the number of
Positive Predictions and total Number of predictions made, respectively.
Perfect score
A perfect score for this metric means that the model does not predict
positively any of the subgroups at a different rate than it does for the
rest of the population. For example, if the protected attributes are race
and sex, then a perfect statistical parity would mean that all combinations
of values for race and sex have identical ratios of positive predictions.
Perfect values are:
- 1 if using ``'ratio'`` as ``distance_measure``.
- 0 if using ``'diff'`` as ``distance_measure``.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
References
----------
[1] `Cynthia Dwork et al. "Fairness Through Awareness". Innovations in
Theoretical Computer Science. 2012. <https://arxiv.org/abs/1104.3913>`_
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import ModelStatisticalParityScorer
scorer = ModelStatisticalParityScorer(['race', 'sex'])
scorer(model, X, y_true)
This metric does not require `y_true`. It can also be called using
.. code-block:: python
scorer(model, X)
""" # noqa: D412
def __init__(
self,
protected_attributes: Union[pd.Series, np.ndarray, List, str],
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
super().__init__(
protected_attributes=protected_attributes,
metric=model_statistical_parity,
distance_measure=distance_measure,
reduction=reduction,
allow_distance_measure_none=False,
)
[docs]
def __call__(
self,
model: Any,
X: pd.DataFrame,
y_true: Optional[Union[pd.Series, np.ndarray, List]] = None,
supplementary_features: Optional[pd.DataFrame] = None,
):
"""
Compute the metric using a model's predictions on a given array
of instances ``X``.
Parameters
----------
model: Any
Object that implements a `predict(X)` function to collect
categorical predictions.
X : pandas.DataFrame
Array of instances to compute the metric on.
y_true : pandas.Series, numpy.ndarray, list, or None, default=None
Array of groundtruth labels.
supplementary_features : pandas.DataFrame, or None, default=None
Array of supplementary features for each instance. Used in case
one attribute in ``self.protected_attributes`` is not contained by
``X`` (e.g. if the protected attribute is not used by the model).
Returns
-------
float, dict
The computed metric value, with format according to ``self.reduction``.
Raises
------
GuardianAIValueError
- if a feature is present in both ``X``
and ``supplementary_features``.
"""
y_pred = model.predict(X)
subgroups = self._get_check_subgroups(X, supplementary_features)
return self.metric(
y_true, y_pred, subgroups, self.distance_measure, self.reduction
)
# This function has the same signature as other model metrics even though it
# does not need nor use y_true.
# We use default values of None for the unused `y_true` and required `y_pred`
# and `subgroups` arguments. This way this function can be called using
# `model_statistical_parity(y_pred=y_pred, subgroups=subgroups)`.
[docs]
def model_statistical_parity(
y_true: Optional[Union[pd.Series, np.ndarray, List]] = None,
y_pred: Optional[Union[pd.Series, np.ndarray, List]] = None,
subgroups: Optional[pd.DataFrame] = None,
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
"""
Measure the statistical parity of a model's output between subgroups
and the rest of the population.
For more details, refer to :class:`.ModelStatisticalParityScorer`.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list or None, default=None
Array of groundtruth labels.
y_pred : pandas.Series, numpy.ndarray, list or None, default=None
Array of model predictions.
subgroups : pandas.DataFrame or None, default=None
Dataframe containing protected attributes for each instance.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
Returns
-------
float, dict
The computed metric value, with format according to `reduction`.
Raises
------
GuardianAIValueError
If Value of None is received for either `y_pred` or `subgroups`.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import model_statistical_parity
subgroups = X[['race', 'sex']]
model_statistical_parity(y_true, y_pred, subgroups)
This metric does not require `y_true`. It can also be called using
.. code-block:: python
model_statistical_parity(None, y_pred, subgroups)
model_statistical_parity(y_pred=y_pred, subgroups=subgroups)
""" # noqa: D412
if y_pred is None or subgroups is None:
raise GuardianAIValueError(
"Value of None was received for either `y_pred` or `subgroups`. "
"This may be due to calling the metric using only 2 positional "
"arguments. If this is the case, either call the function by "
"passing ``None`` as the first argument or use named arguments for "
"`y_pred` and `subgroups`."
)
return _model_metric(
None,
y_pred,
subgroups,
metric="selection_rate",
distance_measure=distance_measure,
reduction=reduction,
allow_y_true_none=True,
allow_distance_measure_none=False,
)
[docs]
class TruePositiveRateScorer(_AllGroupPairsModelFairnessScorer):
"""
Measures the disparity of a model's true positive rate between subgroups
and the rest of the population (also known as equal opportunity).
For each subgroup, the disparity is measured by comparing the true positive
rate on instances of a subgroup against the rest of the population.
True Positive Rate [1] (also known as TPR, recall, or sensitivity) is
calculated as TP / (TP + FN), where TP and FN are the number of true
positives and false negatives, respectively.
Perfect score
A perfect score for this metric means that the model does not correctly
predict the positive class for any of the subgroups more often than it
does for the rest of the population. For example, if the protected
attributes are race and sex, then a perfect true positive rate disparity
would mean that all combinations of values for race and sex have
identical true positive rates. Perfect values are:
- 1 if using ``'ratio'`` as ``distance_measure``.
- 0 if using ``'diff'`` as ``distance_measure``.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
References
----------
[1] `Moritz Hardt et al. "Equality of Opportunity in Supervised Learning".
Advances in Neural Information Processing Systems. 2016.
<https://arxiv.org/pdf/1610.02413.pdf>`_
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import TruePositiveRateScorer
scorer = TruePositiveRateScorer(['race', 'sex'])
scorer(model, X, y_true)
"""
def __init__(
self,
protected_attributes: Union[pd.Series, np.ndarray, List, str],
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
super().__init__(
protected_attributes=protected_attributes,
metric=true_positive_rate,
distance_measure=distance_measure,
reduction=reduction,
allow_distance_measure_none=False,
)
[docs]
def true_positive_rate(
y_true: Union[pd.Series, np.ndarray, List],
y_pred: Union[pd.Series, np.ndarray, List],
subgroups: pd.DataFrame,
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
"""
Measures the disparity of a model's true positive rate between subgroups
and the rest of the population.
For more details, refer to :class:`.TruePositiveRateScorer`.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels.
y_pred : pandas.Series, numpy.ndarray, list
Array of model predictions.
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
Returns
-------
float, dict
The computed metric value, with format according to `reduction`.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import true_positive_rate
subgroups = X[['race', 'sex']]
true_positive_rate(y_true, y_pred, subgroups)
"""
return _model_metric(
y_true,
y_pred,
subgroups,
metric="true_positive_rate",
distance_measure=distance_measure,
reduction=reduction,
allow_y_true_none=False,
allow_distance_measure_none=False,
)
[docs]
class FalsePositiveRateScorer(_AllGroupPairsModelFairnessScorer):
"""
Measures the disparity of a model's false positive rate between subgroups
and the rest of the population.
For each subgroup, the disparity is measured by comparing the false
positive rate on instances of a subgroup against the rest of the population.
False Positive Rate [1] (also known as FPR or fall-out) is calculated as
FP / (FP + TN), where FP and TN are the number of false positives and
true negatives, respectively.
Perfect score
A perfect score for this metric means that the model does not incorrectly
predict the positive class for any of the subgroups more often than it
does for the rest of the population. For example, if the protected
attributes are race and sex, then a perfect false positive rate disparity
would mean that all combinations of values for race and sex have identical
false positive rates. Perfect values are:
- 1 if using ``'ratio'`` as ``distance_measure``.
- 0 if using ``'diff'`` as ``distance_measure``.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
References
----------
[1] `Alexandra Chouldechova. "Fair Prediction with Disparate Impact: A Study
of Bias in Recidivism Prediction Instruments". Big Data (2016).
<https://www.liebertpub.com/doi/10.1089/big.2016.0047>`_
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import FalsePositiveRateScorer
scorer = FalsePositiveRateScorer(['race', 'sex'])
scorer(model, X, y_true)
"""
def __init__(
self,
protected_attributes: Union[pd.Series, np.ndarray, List, str],
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
super().__init__(
protected_attributes=protected_attributes,
metric=false_positive_rate,
distance_measure=distance_measure,
reduction=reduction,
allow_distance_measure_none=False,
)
[docs]
def false_positive_rate(
y_true: Union[pd.Series, np.ndarray, List],
y_pred: Union[pd.Series, np.ndarray, List],
subgroups: pd.DataFrame,
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
"""
Measures the disparity of a model's false positive rate between subgroups
and the rest of the population.
For more details, refer to :class:`.FalsePositiveRateScorer`.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels.
y_pred : pandas.Series, numpy.ndarray, list
Array of model predictions.
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
Returns
-------
float, dict
The computed metric value, with format according to `reduction`.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import false_positive_rate
subgroups = X[['race', 'sex']]
false_positive_rate(y_true, y_pred, subgroups)
"""
return _model_metric(
y_true,
y_pred,
subgroups,
metric="false_positive_rate",
distance_measure=distance_measure,
reduction=reduction,
allow_y_true_none=False,
allow_distance_measure_none=False,
)
[docs]
class FalseNegativeRateScorer(_AllGroupPairsModelFairnessScorer):
"""
Measures the disparity of a model's false negative rate between subgroups
and the rest of the population.
For each subgroup, the disparity is measured by comparing the false
negative rate on instances of a subgroup against the rest of the population.
False Negative Rate [1] (also known as FNR or miss rate) is calculated as
FN / (FN + TP), where FN and TP are the number of false negatives and
true positives, respectively.
Perfect score
A perfect score for this metric means that the model does not incorrectly
predict the negative class for any of the subgroups more often than it
does for the rest of the population. For example, if the protected
attributes are race and sex, then a perfect false negative rate disparity
would mean that all combinations of values for race and sex have identical
false negative rates. Perfect values are:
- 1 if using ``'ratio'`` as ``distance_measure``.
- 0 if using ``'diff'`` as ``distance_measure``.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
References
----------
[1] `Alexandra Chouldechova. "Fair Prediction with Disparate Impact: A Study
of Bias in Recidivism Prediction Instruments". Big Data (2016).
<https://www.liebertpub.com/doi/10.1089/big.2016.0047>`_
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import FalseNegativeRateScorer
scorer = FalseNegativeRateScorer(['race', 'sex'])
scorer(model, X, y_true)
"""
def __init__(
self,
protected_attributes: Union[pd.Series, np.ndarray, List, str],
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
super().__init__(
protected_attributes=protected_attributes,
metric=false_negative_rate,
distance_measure=distance_measure,
reduction=reduction,
allow_distance_measure_none=False,
)
[docs]
def false_negative_rate(
y_true: Union[pd.Series, np.ndarray, List],
y_pred: Union[pd.Series, np.ndarray, List],
subgroups: pd.DataFrame,
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
"""
Measures the disparity of a model's false negative rate between subgroups
and the rest of the population.
For more details, refer to :class:`.FalseNegativeRateScorer`.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels.
y_pred : pandas.Series, numpy.ndarray, list
Array of model predictions.
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
Returns
-------
float, dict
The computed metric value, with format according to `reduction`.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import false_negative_rate
subgroups = X[['race', 'sex']]
false_negative_rate(y_true, y_pred, subgroups)
"""
return _model_metric(
y_true,
y_pred,
subgroups,
metric="false_negative_rate",
distance_measure=distance_measure,
reduction=reduction,
allow_y_true_none=False,
allow_distance_measure_none=False,
)
[docs]
class FalseOmissionRateScorer(_AllGroupPairsModelFairnessScorer):
"""
Measures the disparity of a model's false omission rate between subgroups
and the rest of the population.
For each subgroup, the disparity is measured by comparing the false
omission rate on instances of a subgroup against the rest of the population.
False Omission Rate (also known as FOR) is calculated as
FN / (FN + TN), where FN and TN are the number of false negatives and
true negatives, respectively.
Perfect score
A perfect score for this metric means that the model does not make more
mistakes on the negative class for any of the subgroups more often than it
does for the rest of the population. For example, if the protected
attributes are race and sex, then a perfect false omission rate disparity
would mean that all combinations of values for race and sex have identical
false omission rates. Perfect values are:
- 1 if using ``'ratio'`` as ``distance_measure``.
- 0 if using ``'diff'`` as ``distance_measure``.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import FalseOmissionRateScorer
scorer = FalseOmissionRateScorer(['race', 'sex'])
scorer(model, X, y_true)
"""
def __init__(
self,
protected_attributes: Union[pd.Series, np.ndarray, List, str],
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
super().__init__(
protected_attributes=protected_attributes,
metric=false_omission_rate,
distance_measure=distance_measure,
reduction=reduction,
allow_distance_measure_none=False,
)
[docs]
def false_omission_rate(
y_true: Union[pd.Series, np.ndarray, List],
y_pred: Union[pd.Series, np.ndarray, List],
subgroups: pd.DataFrame,
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
"""
Measures the disparity of a model's false omission rate between subgroups
and the rest of the population.
For more details, refer to :class:`.FalseOmissionRateScorer`.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels.
y_pred : pandas.Series, numpy.ndarray, list
Array of model predictions.
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
Returns
-------
float, dict
The computed metric value, with format according to `reduction`.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import false_omission_rate
subgroups = X[['race', 'sex']]
false_omission_rate(y_true, y_pred, subgroups)
"""
return _model_metric(
y_true,
y_pred,
subgroups,
metric="false_omission_rate",
distance_measure=distance_measure,
reduction=reduction,
allow_y_true_none=False,
allow_distance_measure_none=False,
)
[docs]
class FalseDiscoveryRateScorer(_AllGroupPairsModelFairnessScorer):
"""
Measures the disparity of a model's false discovery rate between subgroups
and the rest of the population.
For each subgroup, the disparity is measured by comparing the false
discovery rate on instances of a subgroup against the rest of the
population.
False Discovery Rate (also known as FDR) is calculated as
FP / (FP + TP), where FP and TP are the number of false positives and
true positives, respectively.
Perfect score
A perfect score for this metric means that the model does not make more
mistakes on the positive class for any of the subgroups more often than it
does for the rest of the population. For example, if the protected
attributes are race and sex, then a perfect false discovery rate disparity
would mean that all combinations of values for race and sex have identical
false discovery rates. Perfect values are:
- 1 if using ``'ratio'`` as ``distance_measure``.
- 0 if using ``'diff'`` as ``distance_measure``.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import FalseDiscoveryRateScorer
scorer = FalseDiscoveryRateScorer(['race', 'sex'])
scorer(model, X, y_true)
"""
def __init__(
self,
protected_attributes: Union[pd.Series, np.ndarray, List, str],
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
super().__init__(
protected_attributes=protected_attributes,
metric=false_discovery_rate,
distance_measure=distance_measure,
reduction=reduction,
allow_distance_measure_none=False,
)
[docs]
def false_discovery_rate(
y_true: Union[pd.Series, np.ndarray, List],
y_pred: Union[pd.Series, np.ndarray, List],
subgroups: pd.DataFrame,
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
"""
Measures the disparity of a model's false discovery rate between subgroups
and the rest of the population.
For more details, refer to :class:`.FalseDiscoveryRateScorer`.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels.
y_pred : pandas.Series, numpy.ndarray, list
Array of model predictions.
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
Returns
-------
float, dict
The computed metric value, with format according to `reduction`.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import false_discovery_rate
subgroups = X[['race', 'sex']]
false_discovery_rate(y_true, y_pred, subgroups)
"""
return _model_metric(
y_true,
y_pred,
subgroups,
metric="false_discovery_rate",
distance_measure=distance_measure,
reduction=reduction,
allow_y_true_none=False,
allow_distance_measure_none=False,
)
[docs]
class ErrorRateScorer(_AllGroupPairsModelFairnessScorer):
"""
Measures the disparity of a model's error rate between subgroups
and the rest of the population.
For each subgroup, the disparity is measured by comparing the error rate on
instances of a subgroup against the rest of the population.
Error Rate (also known as inaccuracy) is calculated as
(FP + FN) / N, where FP and FN are the number of false positives and
false negatives, respectively, while N is the total Number of
instances.
Perfect score
A perfect score for this metric means that the model does not make more
mistakes for any of the subgroups more often than it
does for the rest of the population. For example, if the protected
attributes are race and sex, then a perfect error rate disparity would
mean that all combinations of values for race and sex have identical
error rates. Perfect values are:
- 1 if using ``'ratio'`` as ``distance_measure``.
- 0 if using ``'diff'`` as ``distance_measure``.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import ErrorRateScorer
scorer = ErrorRateScorer(['race', 'sex'])
scorer(model, X, y_true)
"""
def __init__(
self,
protected_attributes: Union[pd.Series, np.ndarray, List, str],
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
super().__init__(
protected_attributes=protected_attributes,
metric=error_rate,
distance_measure=distance_measure,
reduction=reduction,
allow_distance_measure_none=False,
)
[docs]
def error_rate(
y_true: Union[pd.Series, np.ndarray, List],
y_pred: Union[pd.Series, np.ndarray, List],
subgroups: pd.DataFrame,
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
"""
Measures the disparity of a model's error rate between subgroups
and the rest of the population.
For more details, refer to :class:`.ErrorRateScorer`.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels.
y_pred : pandas.Series, numpy.ndarray, list
Array of model predictions.
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
Returns
-------
float, dict
The computed metric value, with format according to `reduction`.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import error_rate
subgroups = X[['race', 'sex']]
error_rate(y_true, y_pred, subgroups)
"""
return _model_metric(
y_true,
y_pred,
subgroups,
metric="error_rate",
distance_measure=distance_measure,
reduction=reduction,
allow_y_true_none=False,
allow_distance_measure_none=False,
)
[docs]
class EqualizedOddsScorer(_AllGroupPairsModelFairnessScorer):
"""
Measures the disparity of a model's true positive and false positive rates
between subgroups and the rest of the population.
The disparity is measured by comparing the true positive and false positive
rates on instances of a subgroup against the rest of the population.
True Positive Rate (also known as TPR, recall, or sensitivity) is
calculated as TP / (TP + FN), where TP and FN are the number of true
positives and false negatives, respectively.
False Positive Rate (also known as FPR or fall-out) is calculated as
FP / (FP + TN), where FP and TN are the number of false positives and
true negatives, respectively.
Equalized Odds [1] is computed by taking the maximum distance between
TPR and FPR for a subgroup against the rest of the population.
Perfect score
A perfect score for this metric means that the model has the same TPR and
FPR when comparing a subgroup to the rest of the population. For example,
if the protected attributes are race and sex, then a perfect
Equalized Odds disparity would mean that all combinations of values for
race and sex have identical TPR and FPR. Perfect values are:
- 1 if using ``'ratio'`` as ``distance_measure``.
- 0 if using ``'diff'`` as ``distance_measure``.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
References
----------
[1] `Moritz Hardt et al. "Equality of Opportunity in Supervised Learning".
Advances in Neural Information Processing Systems. 2016.
<https://arxiv.org/pdf/1610.02413.pdf>`_
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import EqualizedOddsScorer
scorer = EqualizedOddsScorer(['race', 'sex'])
scorer(model, X, y_true)
"""
def __init__(
self,
protected_attributes: Union[pd.Series, np.ndarray, List, str],
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
super().__init__(
protected_attributes=protected_attributes,
metric=equalized_odds,
distance_measure=distance_measure,
reduction=reduction,
allow_distance_measure_none=False,
)
[docs]
def equalized_odds(
y_true: Union[pd.Series, np.ndarray, List],
y_pred: Union[pd.Series, np.ndarray, List],
subgroups: pd.DataFrame,
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
"""
Measures the disparity of a model's true positive and false positive rates
between subgroups and the rest of the population.
For more details, refer to :class:`.EqualizedOddsScorer`.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels.
y_pred : pandas.Series, numpy.ndarray, list
Array of model predictions.
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
Returns
-------
float, dict
The computed metric value, with format according to `reduction`.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import equalized_odds
subgroups = X[['race', 'sex']]
equalized_odds(y_true, y_pred, subgroups)
"""
tpr = true_positive_rate(
y_true,
y_pred,
subgroups,
distance_measure=distance_measure,
reduction=reduction,
)
fpr = false_positive_rate(
y_true,
y_pred,
subgroups,
distance_measure=distance_measure,
reduction=reduction,
)
if isinstance(tpr, dict):
eq_odds = {}
for key in tpr:
eq_odds[key] = np.nanmax([tpr[key], fpr[key]])
else:
eq_odds = np.nanmax([tpr, fpr])
return eq_odds
[docs]
class TheilIndexScorer(_AllGroupPairsModelFairnessScorer):
"""
Measures the disparity of a model's predictions according to groundtruth
labels, as proposed by Speicher et al. [1].
Intuitively, the Theil Index can be thought of as a measure of the
divergence between a subgroup's different error distributions (i.e. false
positives and false negatives) against the rest of the population.
Perfect score
The perfect score for this metric is 0, meaning that the model does not
have a different error distribution for any subgroup when compared to the
rest of the population. For example, if the protected attributes are
race and sex, then a perfect Theil Index disparity would mean that all
combinations of values for race and sex have identical error
distributions.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
distance_measure : str or None, default=None
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
References
----------
[1]: `Speicher, Till, et al. "A unified approach to quantifying algorithmic
unfairness: Measuring individual & group unfairness via inequality
indices." Proceedings of the 24th ACM SIGKDD international conference
on knowledge discovery & data mining. 2018.
<https://arxiv.org/abs/1807.00787>`_
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import TheilIndexScorer
scorer = TheilIndexScorer(['race', 'sex'])
scorer(model, X, y_true)
"""
def __init__(
self,
protected_attributes: Union[pd.Series, np.ndarray, List, str],
distance_measure: Optional[str] = None,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
super().__init__(
protected_attributes=protected_attributes,
metric=theil_index,
distance_measure=distance_measure,
reduction=reduction,
allow_distance_measure_none=True,
)
[docs]
def theil_index(
y_true: Union[pd.Series, np.ndarray, List],
y_pred: Union[pd.Series, np.ndarray, List],
subgroups: pd.DataFrame,
distance_measure: Optional[str] = None,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
"""
Measures the disparity of a model's predictions according to groundtruth
labels, as proposed by Speicher et al. [1].
For more details, refer to :class:`.TheilIndexScorer`.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels.
y_pred : pandas.Series, numpy.ndarray, list
Array of model predictions.
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
distance_measure : str or None, default=None
Determines the distance used to compare a subgroup's metric against
the rest of the population. Possible values are:
* ``'ratio'``: Uses ``(subgroup_val / rest_of_pop_val)``.
Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup_val - rest_of_pop_val |``.
reduction : str or None, default='mean'
Determines how to reduce scores on all subgroups to a single output.
Possible values are:
* ``'max'``: Returns the maximal value among all subgroup metrics.
* ``'mean'``: Returns the mean over all subgroup metrics.
* ``None``: Returns a ``{subgroup: subgroup_metric, ...}`` dict.
Returns
-------
float, dict
The computed metric value, with format according to `reduction`.
Raises
------
AutoMLxValueError
If distance_measure values are given to Theil Index.
References
----------
[1]: `Speicher, Till, et al. "A unified approach to quantifying algorithmic
unfairness: Measuring individual & group unfairness via inequality
indices." Proceedings of the 24th ACM SIGKDD international conference
on knowledge discovery & data mining. 2018.
<https://arxiv.org/abs/1807.00787>`_
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import theil_index
subgroups = X[['race', 'sex']]
theil_index(y_true, y_pred, subgroups)
"""
if distance_measure is not None and not isinstance(
distance_measure, _DistanceMetric
):
raise GuardianAIValueError(
"Theil Index does not accept distance_measure values. It should"
"always be set to ``None``."
)
return _model_metric(
y_true,
y_pred,
subgroups,
metric="between_group_theil_index",
distance_measure=None,
reduction=reduction,
allow_y_true_none=False,
allow_distance_measure_none=True,
)