#!/usr/bin/env python
# -*- coding: utf-8 -*--
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
"""Evaluating the compliance of a dataset with specific fairness metrics"""
from __future__ import annotations
from typing import TYPE_CHECKING, Callable, List, Optional, Union
from guardian_ai.fairness.metrics.utils import (
DEFAULT_DISTANCE,
DEFAULT_REDUCTION,
_check_subgroups,
_FairnessScorer,
_get_attr_idx_mappings,
_get_check_array,
_get_check_distance,
_get_check_inputs,
_get_check_reduction,
_get_score_group_from_metrics,
_place_space_before_capital_letters,
_y_to_aifm_ds,
)
from guardian_ai.fairness.utils.lazy_loader import LazyLoader
from guardian_ai.utils.exception import GuardianAIValueError
if TYPE_CHECKING:
import numpy as np
import pandas as pd
from aif360.metrics import BinaryLabelDatasetMetric
else:
pd = LazyLoader("pandas")
np = LazyLoader("numpy")
BinaryLabelDatasetMetric = LazyLoader(
"aif360.metrics", "BinaryLabelDatasetMetric", suppress_import_warnings=True
)
def _dataset_metric(
y_true: Union[pd.Series, np.ndarray, List],
subgroups: pd.DataFrame,
metric: str,
distance_measure: Optional[str],
reduction: Optional[str],
allow_distance_measure_none: bool,
):
"""
Compute engine for all dataset metrics.
This computes a given metric on all subgroup pairs for a specified ``subgroups`` input.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
metric : str
Name of the base metric to be called.
distance_measure : str or None
Determines the distance used to compare a subgroup's metric
against the rest of the subgroups. Possible values are:
* ``'ratio'``: Uses ``(subgroup1_val / subgroup2_val)``. Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup1_val - subgroup2_val |``.
- ``None``, to not use any distance metric. Only allowed if
`allow_distance_measure_none` is set to True.
reduction : str or None
Determines how to reduce distances on all subgroup pairs to
a single output.
Possible values are:
* ``'max'``: Returns the maximal distance among all subgroup pairs.
* ``'mean'``: Returns the mean over distances between all subgroup pairs.
* ``None``: Returns a ``{subgroup_pair: subgroup_pair_metric, ...}`` dict.
allow_distance_measure_none : bool
Whether or not to allow ``distance_measure`` to be set
to ``None``.
Returns
-------
float, dict
The computed metric value, with format according to `reduction`.
"""
y_true = _get_check_array(y_true, "y_true")
(
reduction,
distance,
attr_vals_to_idx,
attr_idx_to_vals,
subgroup_divisions,
) = _get_check_inputs(reduction, distance_measure, subgroups, allow_distance_measure_none)
ds_true = _y_to_aifm_ds(y_true, subgroups, attr_vals_to_idx)
groups = []
scores = []
visited_subgroup_pairs = set()
# subgroup_divisions is a list of all subgroup pairs,
# e.g. [([{'sex': 0, 'race': 0}], [{'sex': 0, 'race': 1}]), ...]
for unpriv_group, priv_group in subgroup_divisions:
subgroup_metrics = BinaryLabelDatasetMetric(ds_true, unpriv_group, priv_group)
score, group_repr = _get_score_group_from_metrics(
subgroup_metrics,
distance,
metric,
unpriv_group,
priv_group,
attr_idx_to_vals,
)
if (group_repr[1], group_repr[0]) not in visited_subgroup_pairs:
scores.append(score)
groups.append(group_repr)
visited_subgroup_pairs.add(group_repr)
return reduction(groups, scores)
class _DatasetFairnessScorer(_FairnessScorer):
"""
Common base object for all dataset metrics.
This stores settings to pass on to the ``_dataset_metric``
compute engine and does subgroups generation from a `protected_attributes`
array on an input array of instances ``X``.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
metric : str or Callable
Name of the base metric to be called.
distance_measure : str or None
Determines the distance used to compare a subgroup's metric against
the rest of the subgroups. Possible values are:
* ``'ratio'``: Uses ``(subgroup1_val / subgroup2_val)``. Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup1_val - subgroup2_val |``.
- ``None``, to not use any distance metric. Only allowed if
`allow_distance_measure_none` is set to True.
reduction : str or None
Determines how to reduce distances on all subgroup pairs to a single output.
Possible values are:
* ``'max'``: Returns the maximal distance among all subgroup pairs.
* ``'mean'``: Returns the mean over distances between all subgroup pairs.
* ``None``: Returns a ``{subgroup_pair: subgroup_pair_metric, ...}`` dict.
allow_distance_measure_none : bool
Whether or not to allow ``distance_measure`` to be set to ``None``.
"""
def __init__(
self,
protected_attributes: Union[pd.Series, np.ndarray, List, str],
metric: Union[str, Callable],
distance_measure: Optional[str],
reduction: Optional[str],
allow_distance_measure_none: bool,
):
super().__init__(protected_attributes, metric)
self.distance_measure = _get_check_distance(distance_measure, allow_distance_measure_none)
self.reduction = _get_check_reduction(reduction)
def __call__(
self,
model: Optional[object] = None,
X: Optional[pd.DataFrame] = None,
y_true: Optional[Union[pd.Series, np.ndarray, List]] = None,
supplementary_features: Optional[pd.DataFrame] = None,
):
"""
Compute the metric on a given array of instances ``X``.
Parameters
----------
model : object or None, default=None
Object that implements a `predict(X)` function to collect
categorical predictions.
X : pandas.DataFrame or None, default=None
Array of instances to compute the metric on.
y_true : pandas.Series, numpy.ndarray, list or None, default=None
Array of groundtruth labels.
supplementary_features : pandas.DataFrame, or None, default=None
Array of supplementary features for each instance. Used in case
one attribute in ``self.protected_attributes`` is not contained by
``X`` (e.g. if the protected attribute is not used by the model).
Raise an GuardianAIValueError if a feature is present in both ``X`` and
``supplementary_features``.
Returns
-------
float, dict
The computed metric value, with format according to ``self.reduction``.
Raises
------
GuardianAIValueError
If a feature is present in both ``X`` and ``supplementary_features``.
"""
# We use default values of None for the unused `model` and required
# ``X`` and `y_true` arguments. This way model scorers can be called with
# `model_scorer(X=X, y_true=y_true)`.
if X is None or y_true is None:
raise GuardianAIValueError(
"Value of None was received for either ``X`` or ``y_true``. "
"This may be due to calling the metric using only 2 positional "
"arguments. If this is the case, either call the function by "
"passing ``None`` as the first argument or use named arguments "
"for ``X`` and ``y_true``."
)
subgroups = self._get_check_subgroups(X, supplementary_features)
return self.metric(y_true, subgroups, self.distance_measure, self.reduction)
@property
def display_name(self):
base_display_name = super().display_name
fullname = " ".join(
[
self.reduction.display_name,
base_display_name,
self.distance_measure.display_name,
self._display_name_protected_attributes,
]
)
fullname = " ".join(fullname.split())
return _place_space_before_capital_letters(fullname)
[docs]
class DatasetStatisticalParityScorer(_DatasetFairnessScorer):
"""
Measures the statistical parity [1] of a dataset. Statistical parity (also
known as Base Rate or Disparate Impact) for a dataset states that a dataset
is unbiased if the label is independent of the protected attribute.
For each subgroup, statistical parity is computed as the ratio of positive
labels in a subgroup.
Statistical Parity (also known as Base Rate or Disparate Impact) is
calculated as PL / N, where PL and N are the number of Positive Labels and
total number of instances, respectively.
Perfect score
A perfect score for this metric means that the dataset does not have
a different ratio of positive labels for a subgroup than it does for
the rest of the subgroups. For example, if the protected attributes
are race and sex, then a perfect statistical parity would mean that
all combinations of values for race and sex have identical ratios of
positive labels. Perfect values are:
- 1 if using ``'ratio'`` as ``distance_measure``.
- 0 if using ``'diff'`` as ``distance_measure``.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the subgroups. Possible values are:
* ``'ratio'``: Uses ``(subgroup1_val / subgroup2_val)``. Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup1_val - subgroup2_val |``.
reduction : str or None, default='mean'
Determines how to reduce distances on all subgroup pairs to a single output.
Possible values are:
* ``'max'``: Returns the maximal distance among all subgroup pairs.
* ``'mean'``: Returns the mean over distances between all subgroup pairs.
* ``None``: Returns a ``{subgroup_pair: subgroup_pair_metric, ...}`` dict.
References
----------
[1] `Cynthia Dwork et al. "Fairness Through Awareness". Innovations in
Theoretical Computer Science. 2012. <https://arxiv.org/abs/1104.3913>`_
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import DatasetStatisticalParityScorer
scorer = DatasetStatisticalParityScorer(['race', 'sex'])
scorer(X=X, y_true=y_true)
scorer(None, X, y_true)
"""
def __init__(
self,
protected_attributes: Union[pd.Series, np.ndarray, List, str],
distance_measure: str = DEFAULT_DISTANCE,
reduction: Optional[str] = DEFAULT_REDUCTION,
):
super().__init__(
protected_attributes=protected_attributes,
metric=dataset_statistical_parity,
distance_measure=distance_measure,
reduction=reduction,
allow_distance_measure_none=False,
)
[docs]
def dataset_statistical_parity(
y_true: Union[pd.Series, np.ndarray, List],
subgroups: pd.DataFrame,
distance_measure: str = DEFAULT_DISTANCE,
reduction: str = DEFAULT_REDUCTION,
):
"""
Measures the statistical parity of a dataset.
For more details, refer to :class:`.DatasetStatisticalParityScorer`.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
distance_measure : str, default='diff'
Determines the distance used to compare a subgroup's metric against
the rest of the subgroups. Possible values are:
* ``'ratio'``: Uses ``(subgroup1_val / subgroup2_val)``. Inverted to always be >= 1 if needed.
* ``'diff'``: Uses ``| subgroup1_val - subgroup2_val |``.
reduction : str, default='mean'
Determines how to reduce distances on all subgroup pairs to a single output.
Possible values are:
* ``'max'``: Returns the maximal distance among all subgroup pairs.
* ``'mean'``: Returns the mean over distances between all subgroup pairs.
* ``None``: Returns a ``{subgroup_pair: subgroup_pair_metric, ...}`` dict.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import dataset_statistical_parity
subgroups = X[['race', 'sex']]
dataset_statistical_parity(y_true, subgroups)
"""
return _dataset_metric(
y_true,
subgroups,
metric="base_rate",
distance_measure=distance_measure,
reduction=reduction,
allow_distance_measure_none=False,
)
def _simple_dataset_metric(
y_true: Union[pd.Series, np.ndarray, List], subgroups: pd.DataFrame, metric: str
):
"""
Compute engine for dataset metrics that do not require a distance
measure or reduction function because they already return a float value.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
metric : str
Name of the base metric to be called.
Returns
-------
float
The computed metric value.
"""
y_true = _get_check_array(y_true, "y_true")
_check_subgroups(subgroups)
attr_vals_to_idx, attr_idx_to_vals = _get_attr_idx_mappings(subgroups)
ds_true = _y_to_aifm_ds(y_true, subgroups, attr_vals_to_idx)
metrics_obj = BinaryLabelDatasetMetric(ds_true)
metric_val = getattr(metrics_obj, metric)()
return metric_val
class _SimpleDatasetFairnessScorer(_FairnessScorer):
def __call__(
self,
model: Optional[object] = None,
X: Optional[pd.DataFrame] = None,
y_true: Optional[Union[pd.Series, np.ndarray, List]] = None,
supplementary_features: Optional[pd.DataFrame] = None,
):
# We use default values of None for the unused `model` and required
# ``X`` and `y_true` arguments. This way model scorers can be called with
# `model_scorer(X=X, y_true=y_true)`.
if X is None or y_true is None:
raise GuardianAIValueError(
"Value of None was received for either ``X`` or `y_true`. "
"This may be due to calling the metric using only 2 positional "
"arguments. If this is the case, either call the function by "
"passing ``None`` as the first argument or use named arguments "
"for ``X`` and `y_true`."
)
subgroups = self._get_check_subgroups(X, supplementary_features)
return self.metric(y_true, subgroups)
[docs]
class ConsistencyScorer(_SimpleDatasetFairnessScorer):
"""
Measures the consistency of a dataset.
Consistency is measured as the number of ratio of instances that have a
different label from the k=5 nearest neighbors.
Perfect score
A perfect score for this metric is 0, meaning that the dataset does
not have different labels for instances that are similar to one another.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import ConsistencyScorer
scorer = ConsistencyScorer(['race', 'sex'])
scorer(X=X, y_true=y_true)
scorer(None, X, y_true)
"""
def __init__(self, protected_attributes: Union[pd.Series, np.ndarray, List, str]):
super().__init__(protected_attributes=protected_attributes, metric=consistency)
[docs]
def consistency(y_true: Union[pd.Series, np.ndarray, List], subgroups: pd.DataFrame):
"""
Measures the consistency of a dataset.
For more details, refer to :class:`.ConsistencyScorer`.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import consistency
subgroups = X[['race', 'sex']]
consistency(y_true, subgroups)
"""
# Need to read with [0] because consistency returns an array of size 1.
return _simple_dataset_metric(y_true, subgroups, metric="consistency")[0]
[docs]
class SmoothedEDFScorer(_SimpleDatasetFairnessScorer):
"""
Measures the smoothed Empirical Differential Fairness (EDF) of a dataset, as
proposed by Foulds et al. [1].
Smoothed EDF returns the minimal exponential deviation of positive target
ratios comparing a subgroup to the rest of the subgroups.
This metric is related to :class:`.DatasetStatisticalParity` with
`reduction='max'` and `distance_measure='ratio'`, with the only difference
being that :class:`.SmoothedEDFScorer` returns a logarithmic value instead.
Perfect score
A perfect score for this metric is 0, meaning that the dataset does
not have a different ratio of positive labels for a subgroup than
it does for the rest of the subgroups. For example, if the
protected attributes are race and sex, then a perfect smoothed EDF
would mean that all combinations of values for race and sex have
identical ratios of positive labels.
Parameters
----------
protected_attributes: pandas.Series, numpy.ndarray, list, str
Array of attributes or single attribute that should be treated as
protected. If an attribute is protected, then all of its unique
values are considered as subgroups.
References
----------
[1] `Foulds, James R., et al. "An intersectional definition of fairness."
2020 IEEE 36th International Conference on Data Engineering (ICDE).
IEEE, 2020. <https://arxiv.org/abs/1807.08362>`_
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import SmoothedEDFScorer
scorer = SmoothedEDFScorer(['race', 'sex'])
scorer(X=X, y_true=y_true)
scorer(None, X, y_true)
"""
def __init__(self, protected_attributes: Union[pd.Series, np.ndarray, List, str]):
super().__init__(protected_attributes=protected_attributes, metric=smoothed_edf)
[docs]
def smoothed_edf(y_true: Union[pd.Series, np.ndarray, List], subgroups: pd.DataFrame):
"""
Measures the smoothed Empirical Differential Fairness (EDF) of a dataset, as
proposed by Foulds et al. [1].
For more details, refer to :class:`.SmoothedEDFScorer`.
Parameters
----------
y_true : pandas.Series, numpy.ndarray, list
Array of groundtruth labels
subgroups : pandas.DataFrame
Dataframe containing protected attributes for each instance.
References
----------
[1] `Foulds, James R., et al. "An intersectional definition of fairness."
2020 IEEE 36th International Conference on Data Engineering (ICDE).
IEEE, 2020. <https://arxiv.org/abs/1807.08362>`_
Examples
--------
.. code-block:: python
from guardian_ai.fairness.metrics import smoothed_edf
subgroups = X[['race', 'sex']]
smoothed_edf(y_true, subgroups)
"""
return _simple_dataset_metric(
y_true, subgroups, metric="smoothed_empirical_differential_fairness"
)