Quick Start¶

This section provides a quick introduction about how to use the oracle-guardian-ai package.

Installation¶

Installing the oracle-guardian-ai base package
```
pip install oracle-guardian-ai
```
Installing extras libraries

The all-optional module will install all optional dependencies. Note the single quotes around installation of extra libraries.

pip install 'oracle-guardian-ai[all-optional]'

To work with fairness/bias, install the fairness module.

pip install 'oracle-guardian-ai[fairness]'

To work with privacy estimation, install the privacy module.

python3 -m pip install 'oracle-guardian-ai[privacy]'

Measurement with a Fairness Metric¶

Measure the Compliance of a Model with a Fairness Metric¶

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import roc_auc_score
from guardian_ai.fairness.metrics import ModelStatisticalParityScorer

dataset = fetch_openml(name='adult', as_frame=True)
df, y = dataset.data, dataset.target

# Several of the columns are incorrectly labeled as category type in the original dataset
numeric_columns = ['age', 'capitalgain', 'capitalloss', 'hoursperweek']
for col in df.columns:
    if col in numeric_columns:
        df[col] = df[col].astype(int)

X_train, X_test, y_train, y_test = train_test_split(
    df,
    y.map({'>50K': 1, '<=50K': 0}).astype(int),
    train_size=0.7,
    random_state=0
)

sklearn_model = Pipeline(
    steps=[
        ("preprocessor", OneHotEncoder(handle_unknown="ignore")),
        ("classifier", RandomForestClassifier()),
        ]
)
sklearn_model.fit(X_train, y_train)

y_proba = sklearn_model.predict_proba(X_test)
score = roc_auc_score(y_test, y_proba[:, 1])
print(f'Score on test data: {score:.2f}')

fairness_score = ModelStatisticalParityScorer(protected_attributes='sex')
parity_test = fairness_score(sklearn_model, X_test)
print(f'Statistical parity of the model (lower is better): {parity_test:.2f}')

Measure the Compliance of the True Labels of a Dataset with a Fairness Metric¶

from guardian_ai.fairness.metrics import DatasetStatisticalParityScorer
from guardian_ai.fairness.metrics import dataset_statistical_parity
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

dataset = fetch_openml(name='adult', as_frame=True, version=1)
df, y = dataset.data, dataset.target

# Several of the columns are incorrectly labeled as category type in the original dataset
numeric_columns = ['age', 'capitalgain', 'capitalloss', 'hoursperweek']
for col in df.columns:
    if col in numeric_columns:
        df[col] = df[col].astype(int)


X_train, X_test, y_train, y_test = train_test_split(
    df,
    y.map({'>50K': 1, '<=50K': 0}).astype(int),
    train_size=0.7,
    random_state=0
)

DSPS = DatasetStatisticalParityScorer(protected_attributes='sex')
parity_test_data = DSPS(X=X_test, y_true=y_test)
subgroups = X_test[['sex']]
parity_test_data = dataset_statistical_parity(y_test, subgroups)
print(f'Statistical parity of the test data (lower is better): {parity_test_data:.2f}')

Bias Mitigation¶

from guardian_ai.fairness.bias_mitigation import ModelBiasMitigator
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder

dataset = fetch_openml(name='adult', as_frame=True)
df, y = dataset.data, dataset.target

# Several of the columns are incorrectly labeled as category type in the original dataset
numeric_columns = ['age', 'capitalgain', 'capitalloss', 'hoursperweek']
for col in df.columns:
    if col in numeric_columns:
        df[col] = df[col].astype(int)

X_train, X_test, y_train, y_test = train_test_split(
    df, y.map({">50K": 1, "<=50K": 0}).astype(int), train_size=0.8, random_state=12345
)

X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, train_size=0.75, random_state=12345
)

sklearn_model = Pipeline(
    steps=[
        ("preprocessor", OneHotEncoder(handle_unknown="ignore")),
        ("classifier", RandomForestClassifier()),
        ]
)
sklearn_model.fit(X_train, y_train)

bias_mitigated_model = ModelBiasMitigator(
    sklearn_model,
    protected_attribute_names="sex",
    fairness_metric="statistical_parity",
    accuracy_metric="balanced_accuracy",
)

bias_mitigated_model.fit(X_val, y_val)
bias_mitigated_model.predict_proba(X_test)
bias_mitigated_model.predict(X_test)
bias_mitigated_model.tradeoff_summary_
bias_mitigated_model.show_tradeoff(hide_inadmissible=False)

Privacy Estimation¶

import os
from guardian_ai.privacy_estimation.dataset import DataSplit, ClassificationDataset
from guardian_ai.privacy_estimation.model import (
    RandomForestTargetModel,
    GradientBoostingTargetModel,
    LogisticRegressionTargetModel,
    SGDTargetModel,
    MLPTargetModel
)
from guardian_ai.privacy_estimation.attack import AttackType
from guardian_ai.privacy_estimation.attack_runner import AttackRunner
from guardian_ai.privacy_estimation.plot_results import ResultPlot

# Source data directory
source_dir = "<local_path_to_data>"
# dataset name
dataset_name = "titanic"
# source file
source_file = "titanic.csv"
# does the dataset contain header
contains_header = True
# index of the target variable
target_ix = 0
# Seed for data splits
data_split_seed = 42
# File to save results in
result_file = "titanic_out.txt"
# directory to store graphs
graph_dir = "."


if target_ix == -1:
    target_ix = None  # this will automatically pick the last index

ignore_ix = None  # specify if you need to ignore any features

# Define attack metrics we care about
metric_functions = ["precision", "recall", "f1", "accuracy"]
print_roc_curve = False  # print the values of the ROC curve

# Prepare result file for storing target model and attack metrics
fout = open(result_file, "w")
fout.write("dataset\tnum_rows\ttarget_model\ttrain_f1\ttest_f1\tattack_type")
for metric in metric_functions:
    fout.write("\tattack_" + metric)
fout.write("\n")

# Load data
print("Running Dataset: " + dataset_name)
dataset = ClassificationDataset(dataset_name)
dataset.load_data(os.path.join(source_dir,source_file),
                contains_header=contains_header,
                target_ix=target_ix,
                ignore_ix=ignore_ix)

# string for reporting in the result file
result_dataset = dataset_name + "\t" + str(dataset.get_num_rows())


dataset_split_ratios = {
    DataSplit.ATTACK_TRAIN_IN : 0.1,  # fraction of datapoints for training the
    # attack model, included in target model training set
    DataSplit.ATTACK_TRAIN_OUT : 0.1,  # fraction of datapoints for training the
    # attack model, not included in target model training set
    DataSplit.ATTACK_TEST_IN : 0.2,  # fraction of datapoints for evaluating the
    # attack model, included in target model training set
    DataSplit.ATTACK_TEST_OUT : 0.2,  # fraction of datapoints for evaluating the
    # attack model, not included in target model training set
    DataSplit.TARGET_ADDITIONAL_TRAIN : 0.1,  # fraction of datapoints included in
    # target model training set, not used in the attack training or testing
    DataSplit.TARGET_VALID : 0.1,  # fraction of datapoints for tuning the target model
    DataSplit.TARGET_TEST : 0.2  # fraction of datapoints for evaluating the
    # target model
}

dataset.prepare_target_and_attack_data(data_split_seed, dataset_split_ratios)

# Register target model
target_models = []
target_models.append(RandomForestTargetModel())
target_models.append(RandomForestTargetModel(n_estimators=1000))
target_models.append(GradientBoostingTargetModel())
target_models.append(GradientBoostingTargetModel(n_estimators=1000))
target_models.append(LogisticRegressionTargetModel())
target_models.append(SGDTargetModel())
target_models.append(MLPTargetModel())
target_models.append(MLPTargetModel(hidden_layer_sizes=(800,)))

# Specify which attacks you would like to run.
attacks = []
attacks.append(AttackType.LossBasedBlackBoxAttack)
attacks.append(AttackType.ExpectedLossBasedBlackBoxAttack)
attacks.append(AttackType.ConfidenceBasedBlackBoxAttack)
attacks.append(AttackType.ExpectedConfidenceBasedBlackBoxAttack)
attacks.append(AttackType.MerlinAttack)
attacks.append(AttackType.CombinedBlackBoxAttack)
attacks.append(AttackType.CombinedWithMerlinBlackBoxAttack)
attacks.append(AttackType.MorganAttack)

# Setup threshold grids for the threshold based attacks we plan to run.
threshold_grids = {
    AttackType.LossBasedBlackBoxAttack.name: [-0.0001, -0.001, -0.01, -0.05, -0.1, -0.3,
                                            -0.5, -0.7,-0.9, -1.0, -1.5, -10, -50, -100],
    AttackType.ConfidenceBasedBlackBoxAttack.name: [0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9,
                                            0.99, 0.999, 1.0],
    AttackType.MerlinAttack.name: [0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99, 0.999, 1.0]
}

# Initiate AttackRunner
attack_runner = AttackRunner(dataset,
                        target_models,
                        attacks,
                        threshold_grids
                        )

attack_runner.train_target_models()

# Set Cache
cache_input = AttackType.MorganAttack in attacks \
            or AttackType.CombinedBlackBoxAttack \
            or AttackType.CombinedWithMerlinBlackBoxAttack in attacks

# Run attacks
for target_model in target_models:
    result_target = attack_runner.target_model_result_strings.get(target_model.get_model_name())

    for attack_type in attacks:
        result_attack = attack_runner.run_attack(target_model,
                                                attack_type,
                                                metric_functions,
                                                print_roc_curve=print_roc_curve,
                                                cache_input=cache_input)
        fout.write(result_dataset + "\t" + result_target + "\t" + result_attack)
    fout.flush()
fout.close()

# Generates a plot
ResultPlot.print_best_attack(
    dataset_name=dataset.name,
    result_filename=result_file,
    graphs_dir=graph_dir,
    metric_to_sort_on="attack_accuracy",
    )