![]() |
Universidad del Valle de Guatemala - UVGFaculty of Engineering - Computer ScienceCourse: CC3106 - Responsible AI Section: 10 Project 1: Identification and Mitigation of Biases in Machine Learning Models Authors:
|
# --- Imports and Global Configuration ---
import os
from pathlib import Path
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import pandas as pd
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib
FIG_DIR = Path("docs/assets/figures")
DL_DIR = Path("docs/assets/downloads")
FIG_DIR.mkdir(parents=True, exist_ok=True)
DL_DIR.mkdir(parents=True, exist_ok=True)
plt.rcParams["figure.figsize"] = (6, 4)
plt.rcParams["figure.dpi"] = 120
# !pip install ucimlrepo
Exploratory Analysis¶
from ucimlrepo import fetch_ucirepo
import pandas as pd
# fetch dataset
adult = fetch_ucirepo(id=2)
# data (as pandas dataframes)
X = adult.data.features
y = adult.data.targets
print(adult.variables)
name role type demographic \ 0 age Feature Integer Age 1 workclass Feature Categorical Income 2 fnlwgt Feature Integer None 3 education Feature Categorical Education Level 4 education-num Feature Integer Education Level 5 marital-status Feature Categorical Other 6 occupation Feature Categorical Other 7 relationship Feature Categorical Other 8 race Feature Categorical Race 9 sex Feature Binary Sex 10 capital-gain Feature Integer None 11 capital-loss Feature Integer None 12 hours-per-week Feature Integer None 13 native-country Feature Categorical Other 14 income Target Binary Income description units missing_values 0 N/A None no 1 Private, Self-emp-not-inc, Self-emp-inc, Feder... None yes 2 None None no 3 Bachelors, Some-college, 11th, HS-grad, Prof-... None no 4 None None no 5 Married-civ-spouse, Divorced, Never-married, S... None no 6 Tech-support, Craft-repair, Other-service, Sal... None yes 7 Wife, Own-child, Husband, Not-in-family, Other... None no 8 White, Asian-Pac-Islander, Amer-Indian-Eskimo,... None no 9 Female, Male. None no 10 None None no 11 None None no 12 None None no 13 United-States, Cambodia, England, Puerto-Rico,... None yes 14 >50K, <=50K. None no
print(X)
print(y)
age workclass fnlwgt education education-num \ 0 39 State-gov 77516 Bachelors 13 1 50 Self-emp-not-inc 83311 Bachelors 13 2 38 Private 215646 HS-grad 9 3 53 Private 234721 11th 7 4 28 Private 338409 Bachelors 13 ... ... ... ... ... ... 48837 39 Private 215419 Bachelors 13 48838 64 NaN 321403 HS-grad 9 48839 38 Private 374983 Bachelors 13 48840 44 Private 83891 Bachelors 13 48841 35 Self-emp-inc 182148 Bachelors 13 marital-status occupation relationship \ 0 Never-married Adm-clerical Not-in-family 1 Married-civ-spouse Exec-managerial Husband 2 Divorced Handlers-cleaners Not-in-family 3 Married-civ-spouse Handlers-cleaners Husband 4 Married-civ-spouse Prof-specialty Wife ... ... ... ... 48837 Divorced Prof-specialty Not-in-family 48838 Widowed NaN Other-relative 48839 Married-civ-spouse Prof-specialty Husband 48840 Divorced Adm-clerical Own-child 48841 Married-civ-spouse Exec-managerial Husband race sex capital-gain capital-loss hours-per-week \ 0 White Male 2174 0 40 1 White Male 0 0 13 2 White Male 0 0 40 3 Black Male 0 0 40 4 Black Female 0 0 40 ... ... ... ... ... ... 48837 White Female 0 0 36 48838 Black Male 0 0 40 48839 White Male 0 0 50 48840 Asian-Pac-Islander Male 5455 0 40 48841 White Male 0 0 60 native-country 0 United-States 1 United-States 2 United-States 3 United-States 4 Cuba ... ... 48837 United-States 48838 United-States 48839 United-States 48840 United-States 48841 United-States [48842 rows x 14 columns] income 0 <=50K 1 <=50K 2 <=50K 3 <=50K 4 <=50K ... ... 48837 <=50K. 48838 <=50K. 48839 <=50K. 48840 <=50K. 48841 >50K. [48842 rows x 1 columns]
df = pd.concat([X, y], axis=1)
df
age | workclass | fnlwgt | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | hours-per-week | native-country | income | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 39 | State-gov | 77516 | Bachelors | 13 | Never-married | Adm-clerical | Not-in-family | White | Male | 2174 | 0 | 40 | United-States | <=50K |
1 | 50 | Self-emp-not-inc | 83311 | Bachelors | 13 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0 | 0 | 13 | United-States | <=50K |
2 | 38 | Private | 215646 | HS-grad | 9 | Divorced | Handlers-cleaners | Not-in-family | White | Male | 0 | 0 | 40 | United-States | <=50K |
3 | 53 | Private | 234721 | 11th | 7 | Married-civ-spouse | Handlers-cleaners | Husband | Black | Male | 0 | 0 | 40 | United-States | <=50K |
4 | 28 | Private | 338409 | Bachelors | 13 | Married-civ-spouse | Prof-specialty | Wife | Black | Female | 0 | 0 | 40 | Cuba | <=50K |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
48837 | 39 | Private | 215419 | Bachelors | 13 | Divorced | Prof-specialty | Not-in-family | White | Female | 0 | 0 | 36 | United-States | <=50K. |
48838 | 64 | NaN | 321403 | HS-grad | 9 | Widowed | NaN | Other-relative | Black | Male | 0 | 0 | 40 | United-States | <=50K. |
48839 | 38 | Private | 374983 | Bachelors | 13 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0 | 0 | 50 | United-States | <=50K. |
48840 | 44 | Private | 83891 | Bachelors | 13 | Divorced | Adm-clerical | Own-child | Asian-Pac-Islander | Male | 5455 | 0 | 40 | United-States | <=50K. |
48841 | 35 | Self-emp-inc | 182148 | Bachelors | 13 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0 | 0 | 60 | United-States | >50K. |
48842 rows × 15 columns
Exploratory Analysis and Data Cleaning¶
df.info()
df.describe(include="all")
df.isnull().sum()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 48842 entries, 0 to 48841 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 age 48842 non-null int64 1 workclass 47879 non-null object 2 fnlwgt 48842 non-null int64 3 education 48842 non-null object 4 education-num 48842 non-null int64 5 marital-status 48842 non-null object 6 occupation 47876 non-null object 7 relationship 48842 non-null object 8 race 48842 non-null object 9 sex 48842 non-null object 10 capital-gain 48842 non-null int64 11 capital-loss 48842 non-null int64 12 hours-per-week 48842 non-null int64 13 native-country 48568 non-null object 14 income 48842 non-null object dtypes: int64(6), object(9) memory usage: 5.6+ MB
age 0 workclass 963 fnlwgt 0 education 0 education-num 0 marital-status 0 occupation 966 relationship 0 race 0 sex 0 capital-gain 0 capital-loss 0 hours-per-week 0 native-country 274 income 0 dtype: int64
import matplotlib.pyplot
# matplotlib.use("Agg") # "headless" backend for CI
import matplotlib.pyplot as plt
df['income'].value_counts(normalize=True).plot(kind="bar")
matplotlib.pyplot.title("Income Distribution (>50K vs <=50K)")
plt.show()
# --- CLEANING THE income VARIABLE ---
# Check original unique values
print("Original unique values in income:")
print(df['income'].unique())
# 1. Convert everything to uppercase to avoid issues with 'K' vs 'k'
# 2. Remove extra leading/trailing whitespace
# 3. Remove '.' characters that appear in some values
df['income'] = df['income'].str.upper().str.strip().str.replace('.', '', regex=False)
# Check unique values after cleaning
print("\nUnique values after cleaning:")
print(df['income'].unique())
# --- VISUALIZATION OF THE DISTRIBUTION ---
df['income'].value_counts(normalize=True).plot(kind="bar")
plt.figure(figsize=(5, 3))
plt.title("Income Distribution (>50K vs <=50K)")
plt.ylabel("Proportion")
plt.xlabel("Income")
plt.show()
Original unique values in income: ['<=50K' '>50K' '<=50K.' '>50K.'] Unique values after cleaning: ['<=50K' '>50K']
# =========================================================
# CLEANING CATEGORICAL VARIABLES
# =========================================================
import numpy as np
import pandas as pd
# -----------------------------
# 1) Normalize the TARGET variable (income)
# -----------------------------
if 'income' in df.columns:
print("Original unique values in income:", df['income'].unique())
df['income'] = (
df['income']
.astype(str)
.str.upper()
.str.strip()
.str.replace('.', '', regex=False)
)
print("Unique values after cleaning income:", df['income'].unique())
assert set(df['income'].unique()) <= {">50K", "<=50K"}, "Income contains values outside {>50K, <=50K}"
# -----------------------------
# 2) Categorical variables to clean
# -----------------------------
cat_cols = ["workclass", "marital-status", "occupation", "relationship", "native-country"]
# -----------------------------
# 3) Replace "?" with NaN and strip whitespace
# -----------------------------
for col in cat_cols:
print(f"\n[{col}] unique values (before):", df[col].unique()[:15], "...")
df[col] = (
df[col]
.astype(str)
.str.strip()
.replace({"?": np.nan})
)
print(f"[{col}] unique values (after):", df[col].dropna().unique()[:15], "...")
print(f"[{col}] missing values after:", df[col].isna().sum())
# -----------------------------
# 4) Group infrequent countries into "Other"
# -----------------------------
COUNTRY_MIN_COUNT = 200
if "native-country" in df.columns:
country_counts = df["native-country"].value_counts(dropna=True)
common_countries = country_counts[country_counts >= COUNTRY_MIN_COUNT].index
df["native-country"] = df["native-country"].where(df["native-country"].isna() | df["native-country"].isin(common_countries), "Other")
print("\nSummary of native-country after grouping:")
print(df["native-country"].value_counts(dropna=False).head(15))
print(f"Total 'common' countries: {len(common_countries)}")
print(f"Records labeled as 'Other': {(df['native-country'] == 'Other').sum()}")
# -----------------------------
# 5) Top categories per column
# -----------------------------
print("\n=== Top categories per variable (post-cleaning) ===")
for col in cat_cols:
vc = df[col].value_counts(dropna=False).head(10)
print(f"\n{col}:\n{vc}")
try:
out_path = DL_DIR / "adult_clean.csv"
df.to_csv(out_path, index=False)
print(f"\n✅ Clean dataset saved at: {out_path}")
except Exception as e:
print("\nNote: CSV was not saved because DL_DIR does not exist in this environment. Error:", e)
Original unique values in income: ['<=50K' '>50K'] Unique values after cleaning income: ['<=50K' '>50K'] [workclass] unique values (before): ['State-gov' 'Self-emp-not-inc' 'Private' 'Federal-gov' 'Local-gov' '?' 'Self-emp-inc' 'Without-pay' 'Never-worked' nan] ... [workclass] unique values (after): ['State-gov' 'Self-emp-not-inc' 'Private' 'Federal-gov' 'Local-gov' 'Self-emp-inc' 'Without-pay' 'Never-worked' 'nan'] ... [workclass] missing values after: 1836 [marital-status] unique values (before): ['Never-married' 'Married-civ-spouse' 'Divorced' 'Married-spouse-absent' 'Separated' 'Married-AF-spouse' 'Widowed'] ...
[marital-status] unique values (after): ['Never-married' 'Married-civ-spouse' 'Divorced' 'Married-spouse-absent' 'Separated' 'Married-AF-spouse' 'Widowed'] ... [marital-status] missing values after: 0 [occupation] unique values (before): ['Adm-clerical' 'Exec-managerial' 'Handlers-cleaners' 'Prof-specialty' 'Other-service' 'Sales' 'Craft-repair' 'Transport-moving' 'Farming-fishing' 'Machine-op-inspct' 'Tech-support' '?' 'Protective-serv' 'Armed-Forces' 'Priv-house-serv'] ... [occupation] unique values (after): ['Adm-clerical' 'Exec-managerial' 'Handlers-cleaners' 'Prof-specialty' 'Other-service' 'Sales' 'Craft-repair' 'Transport-moving' 'Farming-fishing' 'Machine-op-inspct' 'Tech-support' 'Protective-serv' 'Armed-Forces' 'Priv-house-serv' 'nan'] ... [occupation] missing values after: 1843 [relationship] unique values (before): ['Not-in-family' 'Husband' 'Wife' 'Own-child' 'Unmarried' 'Other-relative'] ... [relationship] unique values (after): ['Not-in-family' 'Husband' 'Wife' 'Own-child' 'Unmarried' 'Other-relative'] ... [relationship] missing values after: 0 [native-country] unique values (before): ['United-States' 'Cuba' 'Jamaica' 'India' '?' 'Mexico' 'South' 'Puerto-Rico' 'Honduras' 'England' 'Canada' 'Germany' 'Iran' 'Philippines' 'Italy'] ... [native-country] unique values (after): ['United-States' 'Cuba' 'Jamaica' 'India' 'Mexico' 'South' 'Puerto-Rico' 'Honduras' 'England' 'Canada' 'Germany' 'Iran' 'Philippines' 'Italy' 'Poland'] ... [native-country] missing values after: 583 Summary of native-country after grouping: native-country United-States 43832 Other 2701 Mexico 951 NaN 583 Philippines 295 nan 274 Germany 206 Name: count, dtype: int64 Total 'common' countries: 5 Records labeled as 'Other': 2701 === Top categories per variable (post-cleaning) === workclass: workclass Private 33906 Self-emp-not-inc 3862 Local-gov 3136 State-gov 1981 NaN 1836 Self-emp-inc 1695 Federal-gov 1432 nan 963 Without-pay 21 Never-worked 10 Name: count, dtype: int64 marital-status: marital-status Married-civ-spouse 22379 Never-married 16117 Divorced 6633 Separated 1530 Widowed 1518 Married-spouse-absent 628 Married-AF-spouse 37 Name: count, dtype: int64 occupation: occupation Prof-specialty 6172 Craft-repair 6112 Exec-managerial 6086 Adm-clerical 5611 Sales 5504 Other-service 4923 Machine-op-inspct 3022 Transport-moving 2355 Handlers-cleaners 2072 NaN 1843 Name: count, dtype: int64 relationship: relationship Husband 19716 Not-in-family 12583 Own-child 7581 Unmarried 5125 Wife 2331 Other-relative 1506 Name: count, dtype: int64 native-country: native-country United-States 43832 Other 2701 Mexico 951 NaN 583 Philippines 295 nan 274 Germany 206 Name: count, dtype: int64
✅ Clean dataset saved at: docs/assets/downloads/adult_clean.csv
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# --- Sex ---
df['sex'].value_counts(normalize=True).plot(kind="bar", ax=axes[0,0])
axes[0,0].set_title("Distribution of Sex")
axes[0,0].set_ylabel("Proportion")
# --- Race ---
df['race'].value_counts(normalize=True).plot(kind="bar", ax=axes[0,1])
axes[0,1].set_title("Distribution of Race")
axes[0,1].set_ylabel("Proportion")
# --- Education ---
df['education'].value_counts().plot(kind="barh", ax=axes[1,0])
axes[1,0].set_title("Distribution of Education")
axes[1,0].set_xlabel("Frequency")
# --- Hours per week ---
df['hours-per-week'].hist(bins=30, ax=axes[1,1])
axes[1,1].set_title("Distribution of Hours Worked per Week")
axes[1,1].set_xlabel("Hours")
axes[1,1].set_ylabel("Frequency")
plt.tight_layout()
plt.show()
# --- LIST OF CATEGORICAL VARIABLES TO REVIEW ---
categorical_vars = ["workclass", "marital-status", "occupation", "relationship", "native-country"]
for col in categorical_vars:
print(f"\n--- {col.upper()} ---")
print("Unique values:")
print(df[col].unique())
print("\nFrequency (top 10):")
print(df[col].value_counts(dropna=False).head(10))
print("-"*50)
fig, axes = plt.subplots(3, 2, figsize=(14, 12))
axes = axes.flatten()
for i, col in enumerate(categorical_vars):
df[col].value_counts(normalize=True).head(10).plot(kind="bar", ax=axes[i])
axes[i].set_title(f"Distribution of {col} (Top 10)")
axes[i].set_ylabel("Proportion")
fig.delaxes(axes[-1])
plt.tight_layout()
plt.show()
--- WORKCLASS --- Unique values: ['State-gov' 'Self-emp-not-inc' 'Private' 'Federal-gov' 'Local-gov' nan 'Self-emp-inc' 'Without-pay' 'Never-worked' 'nan'] Frequency (top 10): workclass Private 33906 Self-emp-not-inc 3862 Local-gov 3136 State-gov 1981 NaN 1836 Self-emp-inc 1695 Federal-gov 1432 nan 963 Without-pay 21 Never-worked 10 Name: count, dtype: int64 -------------------------------------------------- --- MARITAL-STATUS --- Unique values: ['Never-married' 'Married-civ-spouse' 'Divorced' 'Married-spouse-absent' 'Separated' 'Married-AF-spouse' 'Widowed'] Frequency (top 10): marital-status Married-civ-spouse 22379 Never-married 16117 Divorced 6633 Separated 1530 Widowed 1518 Married-spouse-absent 628 Married-AF-spouse 37 Name: count, dtype: int64 -------------------------------------------------- --- OCCUPATION --- Unique values: ['Adm-clerical' 'Exec-managerial' 'Handlers-cleaners' 'Prof-specialty' 'Other-service' 'Sales' 'Craft-repair' 'Transport-moving' 'Farming-fishing' 'Machine-op-inspct' 'Tech-support' nan 'Protective-serv' 'Armed-Forces' 'Priv-house-serv' 'nan'] Frequency (top 10): occupation Prof-specialty 6172 Craft-repair 6112 Exec-managerial 6086 Adm-clerical 5611 Sales 5504 Other-service 4923 Machine-op-inspct 3022 Transport-moving 2355 Handlers-cleaners 2072 NaN 1843 Name: count, dtype: int64 -------------------------------------------------- --- RELATIONSHIP --- Unique values:
['Not-in-family' 'Husband' 'Wife' 'Own-child' 'Unmarried' 'Other-relative'] Frequency (top 10): relationship Husband 19716 Not-in-family 12583 Own-child 7581 Unmarried 5125 Wife 2331 Other-relative 1506 Name: count, dtype: int64 -------------------------------------------------- --- NATIVE-COUNTRY --- Unique values: ['United-States' 'Other' nan 'Mexico' 'Germany' 'Philippines' 'nan'] Frequency (top 10): native-country United-States 43832 Other 2701 Mexico 951 NaN 583 Philippines 295 nan 274 Germany 206 Name: count, dtype: int64 --------------------------------------------------
Identification of Possible Biases¶
🔎 1. Sex
Distribution: ~67% men, 33% women.
Possible bias:
The dataset is imbalanced → the model may learn more from male patterns than female ones.
Previous studies show that the >50K income rate is much higher among men.
Risk: the model may learn the relationship “being male → higher income”.
🔎 2. Race
Distribution: ~85% white, minorities with very low representation (e.g., Amer-Indian-Eskimo <1%).
Possible bias:
Minority groups are underrepresented → the model will have little information about them.
This may lead to less reliable and potentially discriminatory predictions for those groups.
🔎 3. Education
Distribution: strong concentration in HS-grad, Some-college, and Bachelors.
Possible bias:
Lower education levels have very few examples → the model may overfit.
This reinforces the idea that “more education → higher income”, which, although correlated, should not be used as the only criterion.
Baseline Model Development¶
df
age | workclass | fnlwgt | education | education-num | marital-status | occupation | relationship | race | sex | capital-gain | capital-loss | hours-per-week | native-country | income | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 39 | State-gov | 77516 | Bachelors | 13 | Never-married | Adm-clerical | Not-in-family | White | Male | 2174 | 0 | 40 | United-States | <=50K |
1 | 50 | Self-emp-not-inc | 83311 | Bachelors | 13 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0 | 0 | 13 | United-States | <=50K |
2 | 38 | Private | 215646 | HS-grad | 9 | Divorced | Handlers-cleaners | Not-in-family | White | Male | 0 | 0 | 40 | United-States | <=50K |
3 | 53 | Private | 234721 | 11th | 7 | Married-civ-spouse | Handlers-cleaners | Husband | Black | Male | 0 | 0 | 40 | United-States | <=50K |
4 | 28 | Private | 338409 | Bachelors | 13 | Married-civ-spouse | Prof-specialty | Wife | Black | Female | 0 | 0 | 40 | Other | <=50K |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
48837 | 39 | Private | 215419 | Bachelors | 13 | Divorced | Prof-specialty | Not-in-family | White | Female | 0 | 0 | 36 | United-States | <=50K |
48838 | 64 | nan | 321403 | HS-grad | 9 | Widowed | nan | Other-relative | Black | Male | 0 | 0 | 40 | United-States | <=50K |
48839 | 38 | Private | 374983 | Bachelors | 13 | Married-civ-spouse | Prof-specialty | Husband | White | Male | 0 | 0 | 50 | United-States | <=50K |
48840 | 44 | Private | 83891 | Bachelors | 13 | Divorced | Adm-clerical | Own-child | Asian-Pac-Islander | Male | 5455 | 0 | 40 | United-States | <=50K |
48841 | 35 | Self-emp-inc | 182148 | Bachelors | 13 | Married-civ-spouse | Exec-managerial | Husband | White | Male | 0 | 0 | 60 | United-States | >50K |
48842 rows × 15 columns
Evaluation of Bias in the Model¶
X = df.drop("income", axis=1)
y = df["income"]
# Categorical and numerical variables
categorical = X.select_dtypes(include=["object"]).columns
numeric = X.select_dtypes(exclude=["object"]).columns
# --- PREPROCESSING ---
preprocessor = ColumnTransformer(
transformers=[
("num", StandardScaler(), numeric),
("cat", OneHotEncoder(handle_unknown="ignore"), categorical)
]
)
# --- PIPELINE ---
model = Pipeline(steps=[
("preprocessor", preprocessor),
("classifier", LogisticRegression(max_iter=500))
])
# --- SPLIT ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# --- TRAIN ---
model.fit(X_train, y_train)
# --- EVALUATION ---
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
# --- EXPORT ---
joblib.dump(model, "income_model.pkl")
print("✅ Model trained and saved")
precision recall f1-score support <=50K 0.88 0.94 0.91 7414 >50K 0.74 0.59 0.66 2355 accuracy 0.85 9769 macro avg 0.81 0.76 0.78 9769 weighted avg 0.84 0.85 0.85 9769 ✅ Model trained and saved
Accuracy (0.85)
The model shows good overall performance: 85% accuracy on the test set. However, global accuracy can be misleading, as it hides inequalities between groups, which will become evident in the analysis by sex and race.Precision (>50K = 0.74)
When the model predicts that a person earns >50K, it is correct 74% of the time. This shows that the model is relatively reliable when assigning the positive class, which is important if it were to be used for decision-making.
Recall (>50K = 0.59)
Recall for the >50K class is low, as it correctly identifies only 59% of people who actually earn more than 50K. In other words, the model misses 41% of true positives (false negatives). This means that many individuals who do meet the income criterion are not recognized.
F1-score (>50K = 0.66)
The F1 score combines precision and recall, and in this case it is moderate (0.66). This indicates an acceptable balance between predicting correctly and not leaving too many cases unrecognized, but it still shows that the model struggles to capture all positives.
Class imbalance
It is also observed that the <=50K class has much higher metrics (accuracy 0.94, recall 0.94, F1 = 0.91) than the >50K class. This is expected, since the Adult dataset is imbalanced (more <=50K cases), but it reinforces that the model tends to favor the majority class.
POS_LABEL = ">50K"
def metrics_by_group(X, y_true, y_pred, sensitive_attribute):
results = {}
for group in X[sensitive_attribute].dropna().unique():
idx = (X[sensitive_attribute] == group).values
if idx.sum() == 0:
continue
acc = accuracy_score(y_true[idx], y_pred[idx])
prec = precision_score(y_true[idx], y_pred[idx], pos_label=POS_LABEL, zero_division=0)
rec = recall_score(y_true[idx], y_pred[idx], pos_label=POS_LABEL, zero_division=0)
f1 = f1_score(y_true[idx], y_pred[idx], pos_label=POS_LABEL, zero_division=0)
results[group] = {"accuracy": acc, "precision": prec, "recall": rec, "f1": f1}
return pd.DataFrame(results).T.sort_index()
def rates_and_fairness(X, y_true, y_pred, sensitive_attribute):
df_tmp = X[[sensitive_attribute]].copy()
df_tmp["y_true"] = y_true.values
df_tmp["y_pred"] = y_pred
rows = []
for group, dfg in df_tmp.groupby(sensitive_attribute):
# Selection Rate
sr = np.mean(dfg["y_pred"] == POS_LABEL)
# TPR (Recall on positives)
pos_mask = dfg["y_true"] == POS_LABEL
tpr = np.mean(dfg.loc[pos_mask, "y_pred"] == POS_LABEL) if pos_mask.any() else np.nan
# FPR (False Positive Rate)
neg_mask = dfg["y_true"] != POS_LABEL
fpr = np.mean(dfg.loc[neg_mask, "y_pred"] == POS_LABEL) if neg_mask.any() else np.nan
rows.append({"group": group, "selection_rate": sr, "TPR": tpr, "FPR": fpr})
out = pd.DataFrame(rows).set_index("group").sort_index()
# Disparate impact vs group with highest selection_rate
ref_group = out["selection_rate"].idxmax()
ref_val = out.loc[ref_group, "selection_rate"]
out["disparate_impact_vs_max"] = out["selection_rate"] / (ref_val if ref_val > 0 else np.nan)
# Equal Opportunity Difference (range of TPR)
equal_opp_diff = out["TPR"].max() - out["TPR"].min()
return out, ref_group, equal_opp_diff
def plot_rates(df_rates, sensitive_attribute, folder="fairness_plots"):
Path(folder).mkdir(parents=True, exist_ok=True)
for col in ["selection_rate", "TPR", "FPR"]:
if col not in df_rates.columns:
continue
plt.figure(figsize=(6, 4))
df_rates[col].plot(kind="bar", color="pink", edgecolor="black")
plt.title(f"{col} by {sensitive_attribute}")
plt.xlabel(sensitive_attribute)
plt.ylabel(col)
plt.tight_layout()
plt.xticks(rotation=45)
path = f"{folder}/{col}_by_{sensitive_attribute}.png"
plt.savefig(path, dpi=150)
plt.show()
model = joblib.load("income_model.pkl")
y_pred = model.predict(X_test)
# METRICS by gender
attribute = "sex"
metrics_sex = metrics_by_group(X_test, y_test, y_pred, attribute)
print("Metrics by sex:")
display(metrics_sex)
rates_sex, ref_grp_sex, eq_opp_diff_sex = rates_and_fairness(X_test, y_test, y_pred, attribute)
print("\nRates and fairness by sex:")
display(rates_sex)
print(f"Reference group (highest selection_rate): {ref_grp_sex}")
print(f"Equal Opportunity Difference (TPR range): {eq_opp_diff_sex:.4f}")
plot_rates(rates_sex, attribute)
# METRICS by race
attribute = "race"
metrics_race = metrics_by_group(X_test, y_test, y_pred, attribute)
print("\nMetrics by race:")
display(metrics_race)
rates_race, ref_grp_race, eq_opp_diff_race = rates_and_fairness(X_test, y_test, y_pred, attribute)
print("\nRates and fairness by race:")
display(rates_race)
print(f"Reference group (highest selection_rate): {ref_grp_race}")
print(f"Equal Opportunity Difference (TPR range): {eq_opp_diff_race:.4f}")
plot_rates(rates_race, attribute)
Metrics by sex:
accuracy | precision | recall | f1 | |
---|---|---|---|---|
Female | 0.92824 | 0.785408 | 0.501370 | 0.612040 |
Male | 0.81380 | 0.738433 | 0.601508 | 0.662974 |
Rates and fairness by sex:
selection_rate | TPR | FPR | disparate_impact_vs_max | |
---|---|---|---|---|
group | ||||
Female | 0.072069 | 0.501370 | 0.017434 | 0.290589 |
Male | 0.248011 | 0.601508 | 0.093269 | 1.000000 |
Reference group (highest selection_rate): Male Equal Opportunity Difference (TPR range): 0.1001
Metrics by race:
accuracy | precision | recall | f1 | |
---|---|---|---|---|
Amer-Indian-Eskimo | 0.927083 | 0.600000 | 0.375000 | 0.461538 |
Asian-Pac-Islander | 0.833333 | 0.703704 | 0.662791 | 0.682635 |
Black | 0.904311 | 0.702703 | 0.429752 | 0.533333 |
Other | 0.880597 | 0.500000 | 0.250000 | 0.333333 |
White | 0.845268 | 0.749112 | 0.593809 | 0.662480 |
Rates and fairness by race:
selection_rate | TPR | FPR | disparate_impact_vs_max | |
---|---|---|---|---|
group | ||||
Amer-Indian-Eskimo | 0.052083 | 0.375000 | 0.022727 | 0.204475 |
Asian-Pac-Islander | 0.254717 | 0.662791 | 0.103448 | 1.000000 |
Black | 0.077813 | 0.429752 | 0.026506 | 0.305487 |
Other | 0.059701 | 0.250000 | 0.033898 | 0.234384 |
White | 0.202711 | 0.593809 | 0.068332 | 0.795828 |
Reference group (highest selection_rate): Asian-Pac-Islander Equal Opportunity Difference (TPR range): 0.4128
📊 Analysis by Sex
Selection Rate by sex
The selection rate shows that the model classifies 24.8% of men as >50K, but only 7.2% of women. This means the model much more frequently considers men to have high income, reinforcing a bias against women.
TPR (True Positive Rate) by sex
The recall (TPR) for men is 0.60, while for women it is 0.50. This implies that the model better recognizes men who actually earn >50K than women in the same situation. Although the difference may seem moderate, it is significant, as it reflects an unequal opportunity of being correctly identified as high income.
FPR (False Positive Rate) by sex
Men have a much higher FPR (0.093) than women (0.017). In other words, the model tends to “overclassify” men as >50K even when they are not, while it is much more restrictive with women. This reinforces the conclusion that the model favors men, granting them more positive classifications, even at the cost of making more mistakes.
📊 Analysis by Race
Selection Rate by race
The group with the highest selection rate is Asian-Pac-Islander (25.4%), followed by White (20.3%). In contrast, groups like Amer-Indian-Eskimo (5.2%) and Other (5.9%) are almost never classified as >50K. This shows a clear imbalance in the chances of being recognized as high income, which reflects structural bias.
TPR (True Positive Rate) by race
The model has very high recall for Asian-Pac-Islander (0.66) and White (0.59), meaning it correctly identifies a good share of those who earn >50K in these groups. However, for Other (0.25) and Amer-Indian-Eskimo (0.38), recall drops to critical levels, implying that these groups have a very low probability of being correctly recognized when they actually earn high incomes. The gap between maximum and minimum (Equal Opportunity Difference of 0.41) reflects a very strong inequality.
FPR (False Positive Rate) by race
The Asian-Pac-Islander (0.10) and White (0.07) groups have much higher false positive rates than Amer-Indian-Eskimo (0.02) and Black (0.03). This means the model finds it “easier” to give positive classification opportunities to the former, even when not deserved, while minority groups not only receive fewer opportunities but are also classified with stricter “rigor”. This reveals an asymmetry in treatment across races.
SHAP¶
# ==== SHAP ====
import shap, numpy as np, pandas as pd, matplotlib.pyplot as plt
from scipy import sparse
from sklearn.pipeline import Pipeline
# 1) Preparation
prep = model.named_steps["preprocessor"]
clf = model.named_steps["classifier"]
def feat_names(prep, orig_cols):
names = []
for _, trf, cols in prep.transformers_:
if trf == "drop":
continue
if isinstance(trf, Pipeline):
trf = trf[-1]
if hasattr(trf, "get_feature_names_out"):
names += trf.get_feature_names_out(cols).tolist()
else:
names += list(cols)
return names
Xtr = prep.transform(X_train)
Xte = prep.transform(X_test)
feature_names = feat_names(prep, X_train.columns)
def to_dense(X):
return X.toarray() if sparse.issparse(X) else np.asarray(X)
# Small background
rng = np.random.default_rng(42)
n = Xtr.shape[0]
row_bg = rng.choice(n, size=min(200, n), replace=False)
bg_dense = to_dense(Xtr[row_bg])
Xte_dense = to_dense(Xte)
# Linear explainer with masker
masker = shap.maskers.Independent(bg_dense)
explainer = shap.Explainer(clf, masker, algorithm="linear")
shap_expl = explainer(Xte_dense)
shap_expl.feature_names = feature_names
shap_expl.data = Xte_dense
proba = model.predict_proba(X_test)[:, 1]
y_pred = model.predict(X_test)
y_true = y_test.values
errs = np.where(y_pred != y_true)[0]
i_local = int(errs[0]) if len(errs) else int(np.argmin(np.abs(proba - 0.5)))
# ======= SHAP =======
import numpy as np, pandas as pd, matplotlib.pyplot as plt, textwrap
from scipy.special import expit
plt.rcParams.update({"figure.dpi": 170, "font.size": 10})
def wrap_labels(labels, width=22):
return ["\n".join(textwrap.wrap(str(s), width=width)) for s in labels]
def clean_name(s: str):
s = s.replace("native-country_", "country:")
s = s.replace("marital-status_", "marital:")
s = s.replace("education-num", "edu_num")
s = s.replace("capital-gain", "cap_gain")
s = s.replace("capital-loss", "cap_loss")
s = s.replace("hours-per-week", "hrs_week")
return s
# ---------- base data for ordering/selecting ----------
vals = shap_expl.values
base_logit = float(np.mean(shap_expl.base_values)) if shap_expl.base_values is not None else 0.0
base_prob = float(expit(base_logit))
mean_abs = np.abs(vals).mean(axis=0)
mean_signed = vals.mean(axis=0)
order_abs = np.argsort(-mean_abs)
TOPN = 15
idx_top = order_abs[:TOPN]
feat_top_names = [clean_name(feature_names[i]) for i in idx_top]
# Signed bar
df_signed = pd.DataFrame({
"feature": feat_top_names,
"mean_SHAP": mean_signed[idx_top],
"mean_abs_SHAP": mean_abs[idx_top]
}).sort_values("mean_SHAP")
plt.figure(figsize=(10, 6))
colors = df_signed["mean_SHAP"].apply(lambda x: "#1f77b4" if x < 0 else "#d62728")
plt.barh(wrap_labels(df_signed["feature"]), df_signed["mean_SHAP"], color=colors)
plt.axvline(0, color="k", lw=1)
plt.title(f"01. Mean SHAP with sign (Top {TOPN}) – log-odds")
plt.xlabel("Mean SHAP (negative ↓ / positive ↑)")
plt.tight_layout()
plt.savefig(FIG_DIR / "01_shap_bar_signed.png", bbox_inches="tight")
plt.close()
# Beeswarm
plt.figure(figsize=(10, 6))
shap.plots.beeswarm(shap_expl[:, idx_top], max_display=TOPN, show=False)
plt.title(f"02. SHAP Beeswarm – distribution per individual (Top {TOPN})")
plt.tight_layout()
plt.savefig(FIG_DIR / "02_shap_beeswarm.png", bbox_inches="tight")
plt.close()
# ===================== 02. Trajectories =====================
# Most informative observations by total impact
impact = np.abs(vals).sum(axis=1)
row_idx = np.argsort(-impact)[:min(400, vals.shape[0])]
top12 = idx_top[:12]
plt.figure(figsize=(11, 5))
shap.decision_plot(
base_logit,
vals[row_idx][:, top12],
feature_names=wrap_labels([clean_name(feature_names[i]) for i in top12], 18),
ignore_warnings=True,
show=False
)
plt.title("03. Decision plot – accumulation of contributions (subsample)")
plt.tight_layout()
plt.savefig(FIG_DIR / "03_shap_decision_plot.png", bbox_inches="tight")
plt.close()
# ============== 03. Local explanation ==========
x_local_raw = X_test.iloc[i_local]
x_local_tr = Xte_dense[i_local]
fx_logit = base_logit + float(vals[i_local].sum())
fx_prob = float(expit(fx_logit))
pred_prob = float(proba[i_local])
pred_label = y_pred[i_local]
true_label = y_test.iloc[i_local]
# Waterfall
plt.figure(figsize=(10, 5))
shap.plots.waterfall(shap_expl[i_local], show=False)
plt.title(f"04. Waterfall – record idx={i_local} (pred={pred_label}, true={true_label})")
plt.tight_layout()
plt.savefig(FIG_DIR / "04_shap_waterfall_local.png", bbox_inches="tight")
plt.close()
# Table of top contributions for the record
K = 10
order_local = np.argsort(-np.abs(vals[i_local]))[:K]
df_local = pd.DataFrame({
"feature": [feature_names[j] for j in order_local],
"transformed_value": [float(x_local_tr[j]) for j in order_local],
"shap": [float(vals[i_local, j]) for j in order_local],
"direction": ["↑" if vals[i_local, j] > 0 else "↓" for j in order_local]
})
df_local.to_csv(DL_DIR / "local_top_contributions.csv", index=False)
with open(DL_DIR / "local_summary.txt", "w", encoding="utf-8") as f:
f.write(
"Explained local record\n"
f"- index in X_test: {i_local}\n"
f"- true label: {true_label}\n"
f"- prediction: {pred_label} | prob={pred_prob:.3%}\n"
f"- base_value (log-odds): {base_logit:.3f} | base prob={base_prob:.3%}\n"
f"- f(x) (log-odds): {fx_logit:.3f} | prob(sigmoid): {fx_prob:.3%}\n"
f"- Top-{K} contributions -> see CSV: local_top_contributions.csv\n"
)
# ===================== 04. Marginal effect =====================
for f in ("age", "hours-per-week", "education-num"):
if f in feature_names:
j = feature_names.index(f)
plt.figure(figsize=(8, 5))
shap.plots.scatter(shap_expl[:, j], show=False)
plt.title(f"05. Dependence – {f}")
plt.xlabel(f"Transformed value of {f}")
plt.ylabel("SHAP (log-odds)")
plt.tight_layout()
plt.savefig(FIG_DIR / f"05_dep_{f}.png", bbox_inches="tight")
plt.close()
# ==================== 05. Fairness (by sex and by race) =====================
def abs_by_group(expl, feat_names, groups):
df = pd.DataFrame(np.abs(expl.values), columns=feat_names)
df["__g__"] = groups.values
return df.groupby("__g__")[feat_names].mean().T
def margin_by_group(expl, groups):
margins = expl.values.sum(axis=1)
return pd.DataFrame({"g": groups.values, "margin": margins}).groupby("g")["margin"].agg(["mean","median","count"])
for s in ("sex", "race"):
if s not in X_test.columns:
continue
g = X_test[s]
tab_abs = abs_by_group(shap_expl, feature_names, g)
tab_abs.to_csv(DL_DIR / f"{s}_abs_by_feature.csv")
# Top-15 by mean across groups
mean_across = tab_abs.mean(axis=1).sort_values(ascending=False).head(15)
plt.figure(figsize=(10, 6))
plt.barh(wrap_labels([clean_name(i) for i in mean_across.index]), mean_across.values)
plt.title(f"06. Mean |SHAP| by {s} – Top 15 features")
plt.xlabel("Mean |SHAP|")
plt.tight_layout()
plt.savefig(FIG_DIR / f"06_abs_{s}_top15.png", bbox_inches="tight")
plt.close()
tab_m = margin_by_group(shap_expl, g)
tab_m.to_csv(DL_DIR / f"{s}_margin_summary.csv")
plt.figure(figsize=(7, 4))
plt.barh(tab_m.index.astype(str), tab_m["mean"].values)
plt.title(f"07. Mean SHAP margin by {s} (higher ↑ → stronger push to >50K)")
plt.xlabel("Sum of SHAP (log-odds)")
plt.tight_layout()
plt.savefig(FIG_DIR / f"07_margin_{s}.png", bbox_inches="tight")
plt.close()
# ===================== 06. Interactive ===========================
from pathlib import Path
Path(DL_DIR).mkdir(parents=True, exist_ok=True)
# Local force plot of the same record as waterfall
shap.save_html(
str(DL_DIR / "08_force_local.html"),
shap.plots.force(shap_expl[i_local], feature_names=feature_names, features=Xte_dense[i_local])
)
# Global force plot
take = min(500, vals.shape[0])
idx_rows = np.argsort(-impact)[:take]
shap.save_html(
str(DL_DIR / "09_force_global.html"),
shap.plots.force(shap_expl[idx_rows], feature_names=feature_names, features=Xte_dense[idx_rows])
)
print("✅ Presentation generated.")
print(" Suggested order of figures in PDF:")
print(" 01_shap_bar_signed.png → importance + direction (global)")
print(" 02_shap_beeswarm.png → distribution per individual (global)")
print(" 03_shap_decision_plot.png")
print(" 04_shap_waterfall_local.png + DL/local_summary.txt + local_top_contributions.csv")
print(" 05_dep_*.png")
print(" 06_abs_*_top15.png & 07_margin_* .png")
print(" Interactives: 08_force_local.html, 09_force_global.html")
✅ Presentation generated. Suggested order of figures in PDF: 01_shap_bar_signed.png → importance + direction (global) 02_shap_beeswarm.png → distribution per individual (global) 03_shap_decision_plot.png 04_shap_waterfall_local.png + DL/local_summary.txt + local_top_contributions.csv 05_dep_*.png 06_abs_*_top15.png & 07_margin_* .png Interactives: 08_force_local.html, 09_force_global.html
Bias Mitigation Proposal: Reweighing by Race¶
In the baseline model, we observed clear evidence of racial bias:
- The selection rate is much higher for Asian-Pac-Islander (25.4%) and White (20.3%), while groups like Amer-Indian-Eskimo (5.2%) and Other (5.9%) are rarely classified as >50K.
- The True Positive Rate (TPR) is also unequal: Asian-Pac-Islander (0.66) and White (0.59) have much higher recall compared to Other (0.25) or Amer-Indian-Eskimo (0.38).
- The Equal Opportunity Difference (0.41) highlights a large gap between groups, meaning the model is significantly less effective at recognizing high-income individuals from minority groups.
Why Reweighing?¶
We chose Reweighing (Kamiran & Calders, 2012) as a mitigation strategy because:
- It adjusts the weights of training samples according to the joint distribution of the sensitive attribute (
race
) and the target (income
). - This balances the importance of underrepresented racial groups so that the model does not simply learn “being White or Asian-Pac-Islander → higher income”.
- It is a preprocessing method: it works before training and is compatible with our existing pipeline without changing the classifier.
Expected Effect¶
- Minority groups with fewer positive samples (>50K) will receive greater weight during training, improving their chances of being recognized correctly.
- The gap in selection rate and TPR between majority (White, Asian-Pac-Islander) and minority (Other, Amer-Indian-Eskimo, Black) groups should decrease.
- Although global accuracy may slightly decrease, the model will become more equitable across racial groups.
In summary, reweighing by race directly addresses the strongest bias identified in the dataset:
✔ Targets underrepresentation of minorities.
✔ Improves fairness without altering the model architecture.
✔ Enables a transparent before/after comparison with fairness metrics.
# ===========================================
# Model Comparator (baseline vs debiased)
# ===========================================
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
def _overall_metrics(y_true, y_pred, pos_label=">50K"):
"""Global metrics summarized in a dict."""
return {
"accuracy": accuracy_score(y_true, y_pred),
"precision_pos": precision_score(y_true, y_pred, pos_label=pos_label, zero_division=0),
"recall_pos": recall_score(y_true, y_pred, pos_label=pos_label, zero_division=0),
"f1_pos": f1_score(y_true, y_pred, pos_label=pos_label, zero_division=0),
}
def _bar_side_by_side(ax, left_vals, right_vals, labels, title, ylabel, legend=("Baseline","Debiased")):
"""Side-by-side bar chart for quick comparison."""
x = np.arange(len(labels))
w = 0.38
ax.bar(x - w/2, left_vals, width=w, label=legend[0])
ax.bar(x + w/2, right_vals, width=w, label=legend[1])
ax.set_xticks(x, labels, rotation=45, ha="right")
ax.set_title(title)
ax.set_ylabel(ylabel)
ax.legend()
ax.grid(axis="y", alpha=0.3)
def compare_models(
baseline_model,
debiased_model,
X_test,
y_test,
sensitive_attributes=("sex","race"),
pos_label=">50K",
fig_dir=FIG_DIR,
dl_dir=DL_DIR,
tag_a="baseline",
tag_b="debiased"
):
"""
Compare baseline_model vs debiased_model in:
- global metrics
- group metrics (accuracy/precision/recall/f1)
- fairness rates (selection_rate, TPR, FPR, disparate impact, equal opportunity diff)
"""
cmp_name = f"compare_{tag_a}_vs_{tag_b}"
out_fig = Path(fig_dir) / cmp_name
out_dl = Path(dl_dir) / cmp_name
out_fig.mkdir(parents=True, exist_ok=True)
out_dl.mkdir(parents=True, exist_ok=True)
# ------------------ Predictions ------------------
y_pred_a = baseline_model.predict(X_test)
y_pred_b = debiased_model.predict(X_test)
# ------------------ Global metrics ------------------
glob_a = _overall_metrics(y_test, y_pred_a, pos_label)
glob_b = _overall_metrics(y_test, y_pred_b, pos_label)
df_global = pd.DataFrame([glob_a, glob_b], index=[tag_a, tag_b])
df_global.to_csv(out_dl / "global_metrics.csv")
print("== Global metrics ==")
display(df_global)
# Confusion matrix
for tag, yhat in [(tag_a, y_pred_a), (tag_b, y_pred_b)]:
cm = confusion_matrix(y_test, yhat, labels=[pos_label, "<=50K"])
df_cm = pd.DataFrame(
cm,
index=[f"true_{pos_label}", "true_<=50K"],
columns=[f"pred_{pos_label}", "pred_<=50K"]
)
df_cm.to_csv(out_dl / f"confusion_matrix_{tag}.csv")
plt.figure(figsize=(4.5, 4))
plt.imshow(df_cm, cmap="Blues")
plt.title(f"Confusion Matrix – {tag}")
plt.xticks(range(2), df_cm.columns, rotation=15)
plt.yticks(range(2), df_cm.index)
for (i,j), v in np.ndenumerate(cm):
plt.text(j, i, int(v), ha="center", va="center")
plt.tight_layout()
plt.savefig(out_fig / f"confusion_matrix_{tag}.png", dpi=150)
plt.close()
# ------------------ By sensitive attribute ------------------
for attr in sensitive_attributes:
if attr not in X_test.columns:
print(f"(skip) '{attr}' not found in X_test.columns")
continue
# Group metrics
m_a = metrics_by_group(X_test, y_test, y_pred_a, attr)
m_b = metrics_by_group(X_test, y_test, y_pred_b, attr)
m_a.to_csv(out_dl / f"{attr}_metrics_{tag_a}.csv")
m_b.to_csv(out_dl / f"{attr}_metrics_{tag_b}.csv")
print(f"\n== {attr.upper()} – metrics by group ==")
print(f"[{tag_a}]"); display(m_a)
print(f"[{tag_b}]"); display(m_b)
# Fairness rates
r_a, ref_a, eq_a = rates_and_fairness(X_test, y_test, y_pred_a, attr)
r_b, ref_b, eq_b = rates_and_fairness(X_test, y_test, y_pred_b, attr)
r_a.to_csv(out_dl / f"{attr}_rates_{tag_a}.csv")
r_b.to_csv(out_dl / f"{attr}_rates_{tag_b}.csv")
print(f"Rates & fairness – {attr} [{tag_a}] ref={ref_a} | EOD={eq_a:.4f}")
display(r_a)
print(f"Rates & fairness – {attr} [{tag_b}] ref={ref_b} | EOD={eq_b:.4f}")
display(r_b)
# --------- Side-by-side bar plots: selection_rate, TPR, FPR ---------
common_index = sorted(set(r_a.index).intersection(set(r_b.index)))
if len(common_index) == 0:
continue
def _vals(df, col):
return df.loc[common_index, col].values if col in df.columns else np.array([np.nan]*len(common_index))
labels = [str(x) for x in common_index]
for col, nice in [("selection_rate","Selection Rate"),
("TPR","TPR (Recall on positives)"),
("FPR","FPR (False Positive Rate)")]:
fig, ax = plt.subplots(figsize=(8,5))
_bar_side_by_side(
ax,
_vals(r_a, col),
_vals(r_b, col),
labels,
title=f"{nice} by {attr}",
ylabel=col,
legend=(tag_a, tag_b)
)
plt.tight_layout()
plt.savefig(out_fig / f"{attr}_{col}_compare.png", dpi=150)
plt.close()
# --------- Fairness summary table ---------
sum_a = r_a[["selection_rate","TPR","FPR","disparate_impact_vs_max"]].copy()
sum_b = r_b[["selection_rate","TPR","FPR","disparate_impact_vs_max"]].copy()
sum_a.columns = [f"{c}_{tag_a}" for c in sum_a.columns]
sum_b.columns = [f"{c}_{tag_b}" for c in sum_b.columns]
df_sum = pd.concat([sum_a, sum_b], axis=1).loc[common_index]
df_sum["equal_opp_diff_"+tag_a] = eq_a
df_sum["equal_opp_diff_"+tag_b] = eq_b
df_sum.to_csv(out_dl / f"{attr}_fairness_summary_compare.csv")
print("\n✅ Comparison generated.")
print(f"Figures in: {out_fig}")
print(f"Tables/CSVs in: {out_dl}")
return {
"global_metrics": df_global,
"paths": {"fig_dir": str(out_fig), "dl_dir": str(out_dl)}
}
# =========================================================
# Bias mitigation via Reweighing (by sensitive attribute) + retrain model
# =========================================================
import numpy as np
import pandas as pd
from sklearn.base import clone
POS_LABEL = ">50K"
def compute_reweighing_weights(X: pd.DataFrame, y: pd.Series, sensitive_attr: str, pos_label: str = POS_LABEL):
"""
Implements Reweighing (Kamiran & Calders).
Weight for each (A=a, Y=y) is: w(a,y) = P(A=a)*P(Y=y) / P(A=a, Y=y)
Returns a numpy array of sample weights aligned with X.index / y.index.
"""
df = pd.DataFrame({sensitive_attr: X[sensitive_attr].values, "y": y.values})
# probabilities
pA = df[sensitive_attr].value_counts(normalize=True) # P(A=a)
pY = df["y"].value_counts(normalize=True) # P(Y=y)
pAY = df.value_counts([sensitive_attr, "y"], normalize=True) # P(A=a, Y=y)
# safe lookup
groups = df[sensitive_attr].dropna().unique()
labels = df["y"].dropna().unique()
eps = 1e-9
weights = []
for a, yv in zip(df[sensitive_attr].values, df["y"].values):
pa = pA.get(a, 0.0)
py = pY.get(yv, 0.0)
pay = pAY.get((a, yv), 0.0)
w = (pa * py) / max(pay, eps) if pa > 0 and py > 0 else 1.0
weights.append(w)
return np.asarray(weights, dtype=float)
def train_reweighed_logreg(
base_pipeline,
X_train: pd.DataFrame,
y_train: pd.Series,
sensitive_attr: str = "sex",
pos_label: str = POS_LABEL,
save_path: str = "income_model_debiased_reweigh_sex.pkl"
):
# 1) compute sample weights using train
sample_w = compute_reweighing_weights(X_train, y_train, sensitive_attr, pos_label)
# 2) clone and fit
debiased_model = clone(base_pipeline)
# Pipeline forwards sample_weight to the last step if argument is named 'classifier__sample_weight'
debiased_model.fit(X_train, y_train, classifier__sample_weight=sample_w)
try:
joblib.dump(debiased_model, save_path)
print(f"✅ Debiased model (reweigh by '{sensitive_attr}') saved to: {save_path}")
except Exception as e:
print("Note: model not saved due to environment error:", e)
# sanity check
print(f"Reweighing performed on sensitive attribute: '{sensitive_attr}'.")
return debiased_model
# =========================================================
# Train debiased model (reweighing by race) and compare
# =========================================================
model_debiased_race = train_reweighed_logreg(
base_pipeline=model,
X_train=X_train,
y_train=y_train,
sensitive_attr="race",
save_path="income_model_debiased_reweigh_race.pkl"
)
results_race = compare_models(
baseline_model=model,
debiased_model=model_debiased_race,
X_test=X_test,
y_test=y_test,
sensitive_attributes=("race",),
pos_label=POS_LABEL,
tag_a="baseline",
tag_b="debiased_reweigh_race"
)
✅ Debiased model (reweigh by 'race') saved to: income_model_debiased_reweigh_race.pkl Reweighing performed on sensitive attribute: 'race'.
== Global metrics ==
accuracy | precision_pos | recall_pos | f1_pos | |
---|---|---|---|---|
baseline | 0.851674 | 0.744337 | 0.585987 | 0.655738 |
debiased_reweigh_race | 0.849422 | 0.740479 | 0.577919 | 0.649177 |
== RACE – metrics by group == [baseline]
accuracy | precision | recall | f1 | |
---|---|---|---|---|
Amer-Indian-Eskimo | 0.927083 | 0.600000 | 0.375000 | 0.461538 |
Asian-Pac-Islander | 0.833333 | 0.703704 | 0.662791 | 0.682635 |
Black | 0.904311 | 0.702703 | 0.429752 | 0.533333 |
Other | 0.880597 | 0.500000 | 0.250000 | 0.333333 |
White | 0.845268 | 0.749112 | 0.593809 | 0.662480 |
[debiased_reweigh_race]
accuracy | precision | recall | f1 | |
---|---|---|---|---|
Amer-Indian-Eskimo | 0.895833 | 0.400000 | 0.500000 | 0.444444 |
Asian-Pac-Islander | 0.830189 | 0.700000 | 0.651163 | 0.674699 |
Black | 0.898002 | 0.593750 | 0.628099 | 0.610442 |
Other | 0.925373 | 0.666667 | 0.750000 | 0.705882 |
White | 0.843469 | 0.756673 | 0.571764 | 0.651349 |
Rates & fairness – race [baseline] ref=Asian-Pac-Islander | EOD=0.4128
selection_rate | TPR | FPR | disparate_impact_vs_max | |
---|---|---|---|---|
group | ||||
Amer-Indian-Eskimo | 0.052083 | 0.375000 | 0.022727 | 0.204475 |
Asian-Pac-Islander | 0.254717 | 0.662791 | 0.103448 | 1.000000 |
Black | 0.077813 | 0.429752 | 0.026506 | 0.305487 |
Other | 0.059701 | 0.250000 | 0.033898 | 0.234384 |
White | 0.202711 | 0.593809 | 0.068332 | 0.795828 |
Rates & fairness – race [debiased_reweigh_race] ref=Asian-Pac-Islander | EOD=0.2500
selection_rate | TPR | FPR | disparate_impact_vs_max | |
---|---|---|---|---|
group | ||||
Amer-Indian-Eskimo | 0.104167 | 0.500000 | 0.068182 | 0.414062 |
Asian-Pac-Islander | 0.251572 | 0.651163 | 0.103448 | 1.000000 |
Black | 0.134595 | 0.628099 | 0.062651 | 0.535016 |
Other | 0.134328 | 0.750000 | 0.050847 | 0.533955 |
White | 0.193235 | 0.571764 | 0.063175 | 0.768109 |
✅ Comparison generated. Figures in: docs/assets/figures/compare_baseline_vs_debiased_reweigh_race Tables/CSVs in: docs/assets/downloads/compare_baseline_vs_debiased_reweigh_race
Why Also Evaluate Sex?¶
Although the main mitigation applied in this notebook focuses on race, we also evaluate fairness metrics by sex.
The baseline model showed that:
- Selection rate: Men are classified as >50K much more often (24.8%) compared to women (7.2%).
- True Positive Rate (TPR): The model correctly identifies high-income men at 0.60, but only 0.50 for women.
- False Positive Rate (FPR): Men are more often overclassified as >50K (0.093 vs 0.017 for women).
These disparities reveal a gender bias: the model favors men in positive classifications, reinforcing the stereotype “being male → higher income”.
Even though we apply reweighing specifically to address racial bias, evaluating the model also by sex is important because:
- It helps us verify that the mitigation for race does not unintentionally worsen gender fairness.
- It provides a broader perspective on model fairness across different sensitive dimensions.
- Both race and sex are legally and ethically recognized as protected attributes in Responsible AI frameworks.
In summary, analyzing sex alongside race ensures a more complete fairness evaluation and demonstrates the trade-offs that bias mitigation can introduce across different sensitive attributes.
# ======================
# 1) Baseline vs Reweighing por SEX
# ======================
model_debiased_sex = train_reweighed_logreg(
base_pipeline=model,
X_train=X_train,
y_train=y_train,
sensitive_attr="sex",
save_path="income_model_debiased_reweigh_sex.pkl"
)
results_sex = compare_models(
baseline_model=model,
debiased_model=model_debiased_sex,
X_test=X_test,
y_test=y_test,
sensitive_attributes=("sex",),
pos_label=POS_LABEL,
tag_a="baseline",
tag_b="debiased_reweigh_sex"
)
✅ Debiased model (reweigh by 'sex') saved to: income_model_debiased_reweigh_sex.pkl Reweighing performed on sensitive attribute: 'sex'.
== Global metrics ==
accuracy | precision_pos | recall_pos | f1_pos | |
---|---|---|---|---|
baseline | 0.851674 | 0.744337 | 0.585987 | 0.655738 |
debiased_reweigh_sex | 0.845327 | 0.746208 | 0.543100 | 0.628656 |
== SEX – metrics by group == [baseline]
accuracy | precision | recall | f1 | |
---|---|---|---|---|
Female | 0.92824 | 0.785408 | 0.501370 | 0.612040 |
Male | 0.81380 | 0.738433 | 0.601508 | 0.662974 |
[debiased_reweigh_sex]
accuracy | precision | recall | f1 | |
---|---|---|---|---|
Female | 0.918961 | 0.629073 | 0.687671 | 0.657068 |
Male | 0.808905 | 0.781749 | 0.516583 | 0.622088 |
Rates & fairness – sex [baseline] ref=Male | EOD=0.1001
selection_rate | TPR | FPR | disparate_impact_vs_max | |
---|---|---|---|---|
group | ||||
Female | 0.072069 | 0.501370 | 0.017434 | 0.290589 |
Male | 0.248011 | 0.601508 | 0.093269 | 1.000000 |
Rates & fairness – sex [debiased_reweigh_sex] ref=Male | EOD=0.1711
selection_rate | TPR | FPR | disparate_impact_vs_max | |
---|---|---|---|---|
group | ||||
Female | 0.123415 | 0.687671 | 0.051604 | 0.613414 |
Male | 0.201193 | 0.516583 | 0.063132 | 1.000000 |
✅ Comparison generated. Figures in: docs/assets/figures/compare_baseline_vs_debiased_reweigh_sex Tables/CSVs in: docs/assets/downloads/compare_baseline_vs_debiased_reweigh_sex