Source code for guardian_ai.fairness.metrics.dataset

#!/usr/bin/env python
# -*- coding: utf-8 -*--

# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

"""Evaluating the compliance of a dataset with specific fairness metrics"""
from __future__ import annotations

from typing import TYPE_CHECKING, Callable, List, Optional, Union

from guardian_ai.fairness.utils.lazy_loader import LazyLoader
from guardian_ai.fairness.metrics.utils import (
    DEFAULT_DISTANCE,
    DEFAULT_REDUCTION,
    _check_subgroups,
    _FairnessScorer,
    _get_attr_idx_mappings,
    _get_check_array,
    _get_check_distance,
    _get_check_inputs,
    _get_check_reduction,
    _get_score_group_from_metrics,
    _place_space_before_capital_letters,
    _y_to_aifm_ds,
)
from guardian_ai.utils.exception import GuardianAIValueError

if TYPE_CHECKING:
    import numpy as np
    import pandas as pd
    from aif360.metrics import BinaryLabelDatasetMetric
else:
    pd = LazyLoader("pandas")
    np = LazyLoader("numpy")
    BinaryLabelDatasetMetric = LazyLoader(
        "aif360.metrics", "BinaryLabelDatasetMetric", suppress_import_warnings=True
    )


def _dataset_metric(
    y_true: Union[pd.Series, np.ndarray, List],
    subgroups: pd.DataFrame,
    metric: str,
    distance_measure: Optional[str],
    reduction: Optional[str],
    allow_distance_measure_none: bool,
):
    """
    Compute engine for all dataset metrics.

    This computes a given metric on all subgroup pairs for a specified ``subgroups`` input.

    Parameters
    ----------
    y_true : pandas.Series, numpy.ndarray, list
        Array of groundtruth labels
    subgroups : pandas.DataFrame
        Dataframe containing protected attributes for each instance.
    metric : str
        Name of the base metric to be called.
    distance_measure : str or None
        Determines the distance used to compare a subgroup's metric
        against the rest of the subgroups. Possible values are:
            * ``'ratio'``: Uses ``(subgroup1_val / subgroup2_val)``. Inverted to always be >= 1 if needed.
            * ``'diff'``: Uses ``| subgroup1_val - subgroup2_val |``.
        - ``None``, to not use any distance metric. Only allowed if
            `allow_distance_measure_none` is set to True.
    reduction : str or None
        Determines how to reduce scores on all subgroups to
        a single output.
        Possible values are:
            * ``'max'``: Returns the maximal value among all subgroup metrics.
            * ``'mean'``: Returns the mean over all subgroup metrics.
            * ``None``: Returns a ``{subgroup_pair: subgroup_pair_metric, ...}`` dict.
    allow_distance_measure_none : bool
        Whether or not to allow ``distance_measure`` to be set
        to ``None``.

    Returns
    -------
    float, dict
        The computed metric value, with format according to `reduction`.

    """
    y_true = _get_check_array(y_true, "y_true")
    (
        reduction,
        distance,
        attr_vals_to_idx,
        attr_idx_to_vals,
        subgroup_divisions,
    ) = _get_check_inputs(
        reduction, distance_measure, subgroups, allow_distance_measure_none
    )

    ds_true = _y_to_aifm_ds(y_true, subgroups, attr_vals_to_idx)

    groups = []
    scores = []
    visited_subgroup_pairs = set()
    # subgroup_divisions is a list of all subgroup pairs,
    # e.g. [([{'sex': 0, 'race': 0}], [{'sex': 0, 'race': 1}]), ...]
    for unpriv_group, priv_group in subgroup_divisions:
        subgroup_metrics = BinaryLabelDatasetMetric(ds_true, unpriv_group, priv_group)

        score, group_repr = _get_score_group_from_metrics(
            subgroup_metrics,
            distance,
            metric,
            unpriv_group,
            priv_group,
            attr_idx_to_vals,
        )
        if (group_repr[1], group_repr[0]) not in visited_subgroup_pairs:
            scores.append(score)
            groups.append(group_repr)
            visited_subgroup_pairs.add(group_repr)
    return reduction(groups, scores)


class _DatasetFairnessScorer(_FairnessScorer):
    """
    Common base object for all dataset metrics.

    This stores settings to pass on to the ``_dataset_metric``
    compute engine and does subgroups generation from a `protected_attributes`
    array on an input array of instances ``X``.

    Parameters
    ----------
    protected_attributes: pandas.Series, numpy.ndarray, list, str
        Array of attributes or single attribute that should be treated as
        protected. If an attribute is protected, then all of its unique
        values are considered as subgroups.
    metric : str or Callable
        Name of the base metric to be called.
    distance_measure : str or None
        Determines the distance used to compare a subgroup's metric against
        the rest of the subgroups. Possible values are:
            * ``'ratio'``: Uses ``(subgroup1_val / subgroup2_val)``. Inverted to always be >= 1 if needed.
            * ``'diff'``: Uses ``| subgroup1_val - subgroup2_val |``.
        - ``None``, to not use any distance metric. Only allowed if
        `allow_distance_measure_none` is set to True.
    reduction : str or None
        Determines how to reduce scores on all subgroups to a single output.
        Possible values are:
            * ``'max'``: Returns the maximal value among all subgroup metrics.
            * ``'mean'``: Returns the mean over all subgroup metrics.
            * ``None``: Returns a ``{subgroup_pair: subgroup_pair_metric, ...}`` dict.
    allow_distance_measure_none : bool
        Whether or not to allow ``distance_measure`` to be set to ``None``.
    """

    def __init__(
        self,
        protected_attributes: Union[pd.Series, np.ndarray, List, str],
        metric: Union[str, Callable],
        distance_measure: Optional[str],
        reduction: Optional[str],
        allow_distance_measure_none: bool,
    ):
        super().__init__(protected_attributes, metric)

        self.distance_measure = _get_check_distance(
            distance_measure, allow_distance_measure_none
        )
        self.reduction = _get_check_reduction(reduction)

    def __call__(
        self,
        model: Optional[object] = None,
        X: Optional[pd.DataFrame] = None,
        y_true: Optional[Union[pd.Series, np.ndarray, List]] = None,
        supplementary_features: Optional[pd.DataFrame] = None,
    ):
        """
        Compute the metric on a given array of instances ``X``.

        Parameters
        ----------
        model : object or None, default=None
            Object that implements a `predict(X)` function to collect
            categorical predictions.
        X : pandas.DataFrame or None, default=None
            Array of instances to compute the metric on.
        y_true : pandas.Series, numpy.ndarray, list or None, default=None
            Array of groundtruth labels.
        supplementary_features : pandas.DataFrame, or None, default=None
            Array of supplementary features for each instance. Used in case
            one attribute in ``self.protected_attributes`` is not contained by
            ``X`` (e.g. if the protected attribute is not used by the model).
            Raise an GuardianAIValueError if a feature is present in both ``X`` and
            ``supplementary_features``.

        Returns
        -------
        float, dict
            The computed metric value, with format according to ``self.reduction``.

        Raises
        ------
        GuardianAIValueError
            If a feature is present in both ``X`` and ``supplementary_features``.
        """
        # We use default values of None for the unused `model` and required
        # ``X`` and `y_true` arguments. This way model scorers can be called with
        # `model_scorer(X=X, y_true=y_true)`.
        if X is None or y_true is None:
            raise GuardianAIValueError(
                "Value of None was received for either ``X`` or ``y_true``. "
                "This may be due to calling the metric using only 2 positional "
                "arguments. If this is the case, either call the function by "
                "passing ``None`` as the first argument or use named arguments "
                "for ``X`` and ``y_true``."
            )

        subgroups = self._get_check_subgroups(X, supplementary_features)

        return self.metric(y_true, subgroups, self.distance_measure, self.reduction)

    @property
    def display_name(self):
        base_display_name = super().display_name

        fullname = " ".join(
            [
                self.reduction.display_name,
                base_display_name,
                self.distance_measure.display_name,
                self._display_name_protected_attributes,
            ]
        )

        fullname = " ".join(fullname.split())

        return _place_space_before_capital_letters(fullname)


[docs] class DatasetStatisticalParityScorer(_DatasetFairnessScorer): """ Measures the statistical parity [1] of a dataset. Statistical parity (also known as Base Rate or Disparate Impact) for a dataset states that a dataset is unbiased if the label is independent of the protected attribute. For each subgroup, statistical parity is computed as the ratio of positive labels in a subgroup. Statistical Parity (also known as Base Rate or Disparate Impact) is calculated as PL / N, where PL and N are the number of Positive Labels and total number of instances, respectively. Perfect score A perfect score for this metric means that the dataset does not have a different ratio of positive labels for a subgroup than it does for the rest of the subgroups. For example, if the protected attributes are race and sex, then a perfect statistical parity would mean that all combinations of values for race and sex have identical ratios of positive labels. Perfect values are: - 1 if using ``'ratio'`` as ``distance_measure``. - 0 if using ``'diff'`` as ``distance_measure``. Parameters ---------- protected_attributes: pandas.Series, numpy.ndarray, list, str Array of attributes or single attribute that should be treated as protected. If an attribute is protected, then all of its unique values are considered as subgroups. distance_measure : str, default='diff' Determines the distance used to compare a subgroup's metric against the rest of the subgroups. Possible values are: * ``'ratio'``: Uses ``(subgroup1_val / subgroup2_val)``. Inverted to always be >= 1 if needed. * ``'diff'``: Uses ``| subgroup1_val - subgroup2_val |``. reduction : str or None, default='mean' Determines how to reduce scores on all subgroups to a single output. Possible values are: * ``'max'``: Returns the maximal value among all subgroup metrics. * ``'mean'``: Returns the mean over all subgroup metrics. * ``None``: Returns a ``{subgroup_pair: subgroup_pair_metric, ...}`` dict. References ---------- [1] `Cynthia Dwork et al. "Fairness Through Awareness". Innovations in Theoretical Computer Science. 2012. <https://arxiv.org/abs/1104.3913>`_ Examples -------- .. code-block:: python from guardian_ai.fairness.metrics import DatasetStatisticalParityScorer scorer = DatasetStatisticalParityScorer(['race', 'sex']) scorer(X=X, y_true=y_true) scorer(None, X, y_true) """ def __init__( self, protected_attributes: Union[pd.Series, np.ndarray, List, str], distance_measure: str = DEFAULT_DISTANCE, reduction: Optional[str] = DEFAULT_REDUCTION, ): super().__init__( protected_attributes=protected_attributes, metric=dataset_statistical_parity, distance_measure=distance_measure, reduction=reduction, allow_distance_measure_none=False, )
[docs] def dataset_statistical_parity( y_true: Union[pd.Series, np.ndarray, List], subgroups: pd.DataFrame, distance_measure: str = DEFAULT_DISTANCE, reduction: str = DEFAULT_REDUCTION, ): """ Measures the statistical parity of a dataset. For more details, refer to :class:`.DatasetStatisticalParityScorer`. Parameters ---------- y_true : pandas.Series, numpy.ndarray, list Array of groundtruth labels subgroups : pandas.DataFrame Dataframe containing protected attributes for each instance. distance_measure : str, default='diff' Determines the distance used to compare a subgroup's metric against the rest of the subgroups. Possible values are: * ``'ratio'``: Uses ``(subgroup1_val / subgroup2_val)``. Inverted to always be >= 1 if needed. * ``'diff'``: Uses ``| subgroup1_val - subgroup2_val |``. reduction : str, default='mean' Determines how to reduce scores on all subgroups to a single output. Possible values are: * ``'max'``: Returns the maximal value among all subgroup metrics. * ``'mean'``: Returns the mean over all subgroup metrics. * ``None``: Returns a ``{subgroup_pair: subgroup_pair_metric, ...}`` dict. Examples -------- .. code-block:: python from guardian_ai.fairness.metrics import dataset_statistical_parity subgroups = X[['race', 'sex']] dataset_statistical_parity(y_true, subgroups) """ return _dataset_metric( y_true, subgroups, metric="base_rate", distance_measure=distance_measure, reduction=reduction, allow_distance_measure_none=False, )
def _simple_dataset_metric( y_true: Union[pd.Series, np.ndarray, List], subgroups: pd.DataFrame, metric: str ): """ Compute engine for dataset metrics that do not require a distance measure or reduction function because they already return a float value. Parameters ---------- y_true : pandas.Series, numpy.ndarray, list Array of groundtruth labels subgroups : pandas.DataFrame Dataframe containing protected attributes for each instance. metric : str Name of the base metric to be called. Returns ------- float The computed metric value. """ y_true = _get_check_array(y_true, "y_true") _check_subgroups(subgroups) attr_vals_to_idx, attr_idx_to_vals = _get_attr_idx_mappings(subgroups) ds_true = _y_to_aifm_ds(y_true, subgroups, attr_vals_to_idx) metrics_obj = BinaryLabelDatasetMetric(ds_true) metric_val = getattr(metrics_obj, metric)() return metric_val class _SimpleDatasetFairnessScorer(_FairnessScorer): def __call__( self, model: Optional[object] = None, X: Optional[pd.DataFrame] = None, y_true: Optional[Union[pd.Series, np.ndarray, List]] = None, supplementary_features: Optional[pd.DataFrame] = None, ): # We use default values of None for the unused `model` and required # ``X`` and `y_true` arguments. This way model scorers can be called with # `model_scorer(X=X, y_true=y_true)`. if X is None or y_true is None: raise GuardianAIValueError( "Value of None was received for either ``X`` or `y_true`. " "This may be due to calling the metric using only 2 positional " "arguments. If this is the case, either call the function by " "passing ``None`` as the first argument or use named arguments " "for ``X`` and `y_true`." ) subgroups = self._get_check_subgroups(X, supplementary_features) return self.metric(y_true, subgroups)
[docs] class ConsistencyScorer(_SimpleDatasetFairnessScorer): """ Measures the consistency of a dataset. Consistency is measured as the number of ratio of instances that have a different label from the k=5 nearest neighbors. Perfect score A perfect score for this metric is 0, meaning that the dataset does not have different labels for instances that are similar to one another. Parameters ---------- protected_attributes: pandas.Series, numpy.ndarray, list, str Array of attributes or single attribute that should be treated as protected. If an attribute is protected, then all of its unique values are considered as subgroups. Examples -------- .. code-block:: python from guardian_ai.fairness.metrics import ConsistencyScorer scorer = ConsistencyScorer(['race', 'sex']) scorer(X=X, y_true=y_true) scorer(None, X, y_true) """ def __init__(self, protected_attributes: Union[pd.Series, np.ndarray, List, str]): super().__init__(protected_attributes=protected_attributes, metric=consistency)
[docs] def consistency(y_true: Union[pd.Series, np.ndarray, List], subgroups: pd.DataFrame): """ Measures the consistency of a dataset. For more details, refer to :class:`.ConsistencyScorer`. Parameters ---------- y_true : pandas.Series, numpy.ndarray, list Array of groundtruth labels subgroups : pandas.DataFrame Dataframe containing protected attributes for each instance. Examples -------- .. code-block:: python from guardian_ai.fairness.metrics import consistency subgroups = X[['race', 'sex']] consistency(y_true, subgroups) """ # Need to read with [0] because consistency returns an array of size 1. return _simple_dataset_metric(y_true, subgroups, metric="consistency")[0]
[docs] class SmoothedEDFScorer(_SimpleDatasetFairnessScorer): """ Measures the smoothed Empirical Differential Fairness (EDF) of a dataset, as proposed by Foulds et al. [1]. Smoothed EDF returns the minimal exponential deviation of positive target ratios comparing a subgroup to the rest of the subgroups. This metric is related to :class:`.DatasetStatisticalParity` with `reduction='max'` and `distance_measure='ratio'`, with the only difference being that :class:`.SmoothedEDFScorer` returns a logarithmic value instead. Perfect score A perfect score for this metric is 0, meaning that the dataset does not have a different ratio of positive labels for a subgroup than it does for the rest of the subgroups. For example, if the protected attributes are race and sex, then a perfect smoothed EDF would mean that all combinations of values for race and sex have identical ratios of positive labels. Parameters ---------- protected_attributes: pandas.Series, numpy.ndarray, list, str Array of attributes or single attribute that should be treated as protected. If an attribute is protected, then all of its unique values are considered as subgroups. References ---------- [1] `Foulds, James R., et al. "An intersectional definition of fairness." 2020 IEEE 36th International Conference on Data Engineering (ICDE). IEEE, 2020. <https://arxiv.org/abs/1807.08362>`_ Examples -------- .. code-block:: python from guardian_ai.fairness.metrics import SmoothedEDFScorer scorer = SmoothedEDFScorer(['race', 'sex']) scorer(X=X, y_true=y_true) scorer(None, X, y_true) """ def __init__(self, protected_attributes: Union[pd.Series, np.ndarray, List, str]): super().__init__(protected_attributes=protected_attributes, metric=smoothed_edf)
[docs] def smoothed_edf(y_true: Union[pd.Series, np.ndarray, List], subgroups: pd.DataFrame): """ Measures the smoothed Empirical Differential Fairness (EDF) of a dataset, as proposed by Foulds et al. [1]. For more details, refer to :class:`.SmoothedEDFScorer`. Parameters ---------- y_true : pandas.Series, numpy.ndarray, list Array of groundtruth labels subgroups : pandas.DataFrame Dataframe containing protected attributes for each instance. References ---------- [1] `Foulds, James R., et al. "An intersectional definition of fairness." 2020 IEEE 36th International Conference on Data Engineering (ICDE). IEEE, 2020. <https://arxiv.org/abs/1807.08362>`_ Examples -------- .. code-block:: python from guardian_ai.fairness.metrics import smoothed_edf subgroups = X[['race', 'sex']] smoothed_edf(y_true, subgroups) """ return _simple_dataset_metric( y_true, subgroups, metric="smoothed_empirical_differential_fairness" )