Skip to main content
Model Explainability Concept
Model Explainability Real World Example

Model Explainability

The Black Box Problem

Your model predicts a loan should be denied. The customer asks: “Why?” You say: “The neural network decided.” That’s not acceptable in healthcare, finance, legal, and many other domains. We need to explain our models.

Why Explainability Matters

DomainWhy It’s Required
HealthcareDoctors need to validate AI recommendations
FinanceRegulations require explainable credit decisions
LegalRight to explanation in GDPR
HiringAvoid discrimination and bias
InsuranceJustify pricing decisions

Types of Explainability

Global Explainability

How does the model work overall? What features matter most in general?

Local Explainability

Why did the model make THIS specific prediction? What drove this particular decision?

Method 1: Feature Importance

For Tree-Based Models

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# Load data
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target
feature_names = cancer.feature_names

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Feature importance
importance = pd.DataFrame({
    'feature': feature_names,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=True)

# Plot
plt.figure(figsize=(10, 12))
plt.barh(importance['feature'], importance['importance'])
plt.xlabel('Importance')
plt.title('Feature Importance (Random Forest)')
plt.tight_layout()
plt.show()

print("Top 5 most important features:")
print(importance.tail(5).to_string(index=False))

For Linear Models

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

# Scale features for coefficient interpretation
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train logistic regression
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train_scaled, y_train)

# Coefficients
coef_df = pd.DataFrame({
    'feature': feature_names,
    'coefficient': lr.coef_[0]
}).sort_values('coefficient', key=abs, ascending=False)

print("Top 10 features by |coefficient|:")
print(coef_df.head(10).to_string(index=False))

# Visualize
plt.figure(figsize=(10, 12))
sorted_coef = coef_df.sort_values('coefficient')
colors = ['red' if c < 0 else 'green' for c in sorted_coef['coefficient']]
plt.barh(sorted_coef['feature'], sorted_coef['coefficient'], color=colors)
plt.xlabel('Coefficient')
plt.title('Feature Coefficients (Logistic Regression)')
plt.axvline(0, color='black', linewidth=0.5)
plt.tight_layout()
plt.show()

Method 2: Permutation Importance

A model-agnostic method that works for any model:
from sklearn.inspection import permutation_importance

# Calculate permutation importance
perm_importance = permutation_importance(
    model, X_test, y_test, 
    n_repeats=10, 
    random_state=42,
    n_jobs=-1
)

# Create DataFrame
perm_df = pd.DataFrame({
    'feature': feature_names,
    'importance_mean': perm_importance.importances_mean,
    'importance_std': perm_importance.importances_std
}).sort_values('importance_mean', ascending=False)

print("Permutation Importance (Top 10):")
print(perm_df.head(10).to_string(index=False))

# Plot with error bars
plt.figure(figsize=(10, 8))
perm_sorted = perm_df.sort_values('importance_mean', ascending=True).tail(15)
plt.barh(
    perm_sorted['feature'], 
    perm_sorted['importance_mean'],
    xerr=perm_sorted['importance_std']
)
plt.xlabel('Mean Accuracy Decrease')
plt.title('Permutation Importance')
plt.tight_layout()
plt.show()
How permutation importance works:
  1. Baseline: Get model accuracy on test set
  2. Shuffle one feature’s values randomly
  3. Measure accuracy drop
  4. Bigger drop = More important feature

Method 3: SHAP Values

SHAP (SHapley Additive exPlanations) provides both global and local explanations:
# pip install shap
import shap

# Create explainer
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)

# For binary classification, use shap_values[1] for positive class
# SHAP returns a list of arrays for each class

Global: Summary Plot

# Summary plot showing feature importance AND direction
plt.figure(figsize=(12, 8))
shap.summary_plot(shap_values[1], X_test, feature_names=feature_names)

Global: Bar Plot

# Simple bar chart of mean |SHAP| values
plt.figure(figsize=(10, 8))
shap.summary_plot(shap_values[1], X_test, feature_names=feature_names, plot_type="bar")

Local: Individual Prediction Explanation

# Explain a single prediction
sample_idx = 0
sample = X_test[sample_idx:sample_idx+1]

print(f"Actual class: {y_test[sample_idx]}")
print(f"Predicted probability: {model.predict_proba(sample)[0]}")

# Force plot for single prediction
shap.initjs()
shap.force_plot(
    explainer.expected_value[1],
    shap_values[1][sample_idx],
    sample,
    feature_names=feature_names
)

Local: Waterfall Plot

# Waterfall plot showing how features contribute
shap.plots.waterfall(
    shap.Explanation(
        values=shap_values[1][sample_idx],
        base_values=explainer.expected_value[1],
        data=X_test[sample_idx],
        feature_names=feature_names
    )
)

Method 4: LIME (Local Explanations)

LIME explains individual predictions by approximating the model locally:
# pip install lime
from lime.lime_tabular import LimeTabularExplainer

# Create LIME explainer
lime_explainer = LimeTabularExplainer(
    X_train,
    feature_names=feature_names,
    class_names=['Malignant', 'Benign'],
    mode='classification'
)

# Explain a single prediction
sample_idx = 0
sample = X_test[sample_idx]

explanation = lime_explainer.explain_instance(
    sample,
    model.predict_proba,
    num_features=10
)

# Show explanation
print(f"Prediction: {model.predict([sample])[0]}")
print(f"Probability: {model.predict_proba([sample])[0]}")
print("\nLIME Explanation:")
for feature, weight in explanation.as_list():
    print(f"  {feature}: {weight:+.4f}")

# Visual explanation
explanation.show_in_notebook()
# Or save as HTML: explanation.save_to_file('explanation.html')

LIME vs SHAP

AspectLIMESHAP
MethodLocal linear approximationGame theory (Shapley values)
ConsistencyCan vary between runsMathematically consistent
SpeedFastSlower for many samples
GlobalNo (local only)Yes (aggregate local)
AccuracyApproximateExact (for tree models)

Method 5: Partial Dependence Plots

Show how a feature affects predictions, on average:
from sklearn.inspection import PartialDependenceDisplay

# Select features to analyze
features_to_plot = [0, 7, 20, 27]  # worst_radius, mean_concavity, worst_concavity, worst_concave_points

fig, axes = plt.subplots(2, 2, figsize=(12, 10))
PartialDependenceDisplay.from_estimator(
    model, X_train, features_to_plot,
    feature_names=feature_names,
    ax=axes.flatten()
)
plt.tight_layout()
plt.show()

2D Interaction Plot

# Show interaction between two features
fig, ax = plt.subplots(figsize=(10, 8))
PartialDependenceDisplay.from_estimator(
    model, X_train, 
    [(0, 7)],  # worst_radius vs mean_concavity
    feature_names=feature_names,
    ax=ax,
    kind='both'  # Show individual + average
)
plt.tight_layout()
plt.show()

Method 6: ICE Plots

Individual Conditional Expectation - like PDP but for each sample:
from sklearn.inspection import PartialDependenceDisplay

fig, ax = plt.subplots(figsize=(10, 6))
PartialDependenceDisplay.from_estimator(
    model, X_train[:100], [0],  # Use subset for clarity
    feature_names=feature_names,
    ax=ax,
    kind='both',  # ICE + PDP
    ice_lines_kw={'color': 'blue', 'alpha': 0.1},
    pd_line_kw={'color': 'red', 'linewidth': 3}
)
ax.set_title('ICE Plot: Individual Conditional Expectation')
plt.tight_layout()
plt.show()

Practical: Explaining a Loan Decision

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
import shap

# Simulated loan data
np.random.seed(42)
n = 1000

loan_data = pd.DataFrame({
    'income': np.random.normal(60000, 20000, n).clip(20000, 200000),
    'debt_ratio': np.random.uniform(0.1, 0.6, n),
    'credit_score': np.random.normal(700, 50, n).clip(500, 850),
    'employment_years': np.random.exponential(5, n).clip(0, 30),
    'loan_amount': np.random.uniform(5000, 50000, n),
    'num_credit_lines': np.random.poisson(3, n),
    'late_payments': np.random.poisson(1, n)
})

# Generate target (approved or not)
approval_prob = (
    0.5 +
    (loan_data['credit_score'] - 700) / 400 +
    (loan_data['income'] - 60000) / 200000 -
    loan_data['debt_ratio'] * 0.5 +
    loan_data['employment_years'] / 50 -
    loan_data['late_payments'] * 0.1
).clip(0.05, 0.95)

loan_data['approved'] = (np.random.random(n) < approval_prob).astype(int)

# Train model
X = loan_data.drop('approved', axis=1)
y = loan_data['approved']

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)

# Explain a denied application
denied_idx = loan_data[loan_data['approved'] == 0].index[0]
applicant = X.iloc[denied_idx:denied_idx+1]

print("Applicant Profile:")
print(applicant.T)
print(f"\nPrediction: {'Approved' if model.predict(applicant)[0] else 'Denied'}")
print(f"Approval Probability: {model.predict_proba(applicant)[0][1]:.1%}")

# SHAP explanation
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(applicant)

print("\n=== Explanation ===")
print("Factors contributing to denial (SHAP values):")
for i, (feature, value, shap_val) in enumerate(
    zip(X.columns, applicant.values[0], shap_values[1][0])
):
    direction = "↑" if shap_val > 0 else "↓"
    print(f"  {feature}: {value:.2f}{direction} ({shap_val:+.4f})")

Building an Explanation Report

def generate_explanation_report(model, X, sample_idx, feature_names):
    """Generate a comprehensive explanation report."""
    sample = X[sample_idx:sample_idx+1]
    
    # Prediction
    prediction = model.predict(sample)[0]
    probability = model.predict_proba(sample)[0]
    
    # SHAP values
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(sample)
    
    report = f"""
    === PREDICTION EXPLANATION REPORT ===
    
    Prediction: {'Positive' if prediction else 'Negative'}
    Confidence: {max(probability):.1%}
    
    === TOP CONTRIBUTING FACTORS ===
    """
    
    # Sort by absolute SHAP value
    contributions = list(zip(feature_names, sample[0], shap_values[1][0]))
    contributions.sort(key=lambda x: abs(x[2]), reverse=True)
    
    for feature, value, contrib in contributions[:5]:
        direction = "supports" if contrib > 0 else "opposes"
        report += f"\n{feature} = {value:.2f}"
        report += f"\n{direction} prediction ({contrib:+.4f})"
    
    return report

# Example
# report = generate_explanation_report(model, X_test, 0, feature_names)
# print(report)

Key Takeaways

Multiple Methods

Use feature importance, SHAP, LIME, and PDPs together

Global vs Local

Global shows patterns, local explains decisions

SHAP is Gold Standard

Mathematically grounded, works for any model

Document Explanations

Generate reports for stakeholders

What’s Next?

Now that you can explain your models, let’s learn how to build robust ML pipelines!

Continue to ML Pipelines

Build reproducible, production-ready ML workflows