CIFAR-10 vs GTSRB DNN uncertainty estimation using Monte Carlo Dropout#

This Jupyter notebook explores predictive uncertainty estimation in deep neural networks (DNNs) using Monte Carlo Dropout (MCD). Specifically, we investigate how a model trained on the CIFAR-10 dataset behaves when exposed to both:

In-distribution (InD) samples (CIFAR-10)
Out-of-distribution (OoD) samples (GTSRB — German Traffic Sign Recognition Benchmark)

By measuring and comparing entropy of the model’s predictions on both datasets, we aim to:

Understand how confident the model is on familiar (CIFAR-10) vs. unfamiliar (GTSRB) data
Evaluate how well Monte Carlo Dropout captures epistemic (model) uncertainty
Highlight differences in predictive behavior between InD and OoD inputs

I. Import libraries#

[ ]:

import os

import numpy as np
import matplotlib.pyplot as plt

import torch
from torchvision import transforms
from torchvision.datasets import CIFAR10, GTSRB

import aidge_core
import aidge_onnx

II. Import the pre-trained model#

[ ]:

aidge_model = aidge_onnx.load_onnx(
    "./examples/tutorials/Dropout_custom_implementation/ONNX_files/CustomResNet18_cleaned.onnx"
)

III. Load the NumPy arrays from the .npy files for CIFAR-10 (InD) and GTSRB (OoD)#

[ ]:

# Fix random seed for reproducibility
torch.manual_seed(42)

# Base directory to save .npz files
base_output_dir = "./examples/tutorials/Dropout_custom_implementation/NumPy_images"

# Dataset configurations for both 1000 and 3000 image subsets
datasets_info = {
    "CIFAR-10-1000": {
        "dataset_class": CIFAR10,
        "root": "./examples/tutorials/Dropout_custom_implementation/Datasets/cifar10",
        "train": False,
        "num_images": 1000,
        "start_index": 0,
        "output_file": os.path.join(
            base_output_dir, "CIFAR-10", "cifar10_subset_1000.npz"
        ),
    },
    "CIFAR-10-3000": {
        "dataset_class": CIFAR10,
        "root": "./examples/tutorials/Dropout_custom_implementation/Datasets/cifar10",
        "train": False,
        "num_images": 3000,
        "start_index": 1000,
        "output_file": os.path.join(
            base_output_dir, "CIFAR-10", "cifar10_subset_3000.npz"
        ),
    },
    "GTSRB-1000": {
        "dataset_class": GTSRB,
        "root": "./examples/tutorials/Dropout_custom_implementation/Datasets/gtsrb",
        "split": "test",  # <-- Changed from 'val' to 'test'
        "num_images": 1000,
        "start_index": 0,
        "output_file": os.path.join(base_output_dir, "GTSRB", "gtsrb_subset_1000.npz"),
    },
    "GTSRB-3000": {
        "dataset_class": GTSRB,
        "root": "./examples/tutorials/Dropout_custom_implementation/Datasets/gtsrb",
        "split": "test",  # <-- Changed from 'val' to 'test'
        "num_images": 3000,
        "start_index": 1000,
        "output_file": os.path.join(base_output_dir, "GTSRB", "gtsrb_subset_3000.npz"),
    },
}

# Image transform: Resize + Tensor
transform = transforms.Compose(
    [
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
    ]
)


# Dataset extraction and saving function
def extract_and_save_subset(name, info):
    os.makedirs(os.path.dirname(info["output_file"]), exist_ok=True)

    kwargs = {"root": info["root"], "transform": transform, "download": False}

    # ✅ Fix: support subset names by checking prefix
    if name.startswith("CIFAR-10"):
        kwargs["train"] = info.get("train", False)
    elif name.startswith("GTSRB"):
        kwargs["split"] = info.get("split", "test")
    else:
        raise ValueError(f"Dataset {name} not supported")

    # Load the dataset
    dataset = info["dataset_class"](**kwargs)

    # Collect valid samples
    images = []
    labels = []
    collected = 0
    start = info.get("start_index", 0)
    end = start + info["num_images"]

    i = start
    while collected < info["num_images"] and i < len(dataset):
        image, label = dataset[i]
        i += 1

        if label is None:
            print(f"⚠️ Skipping index {i-1} in {name} due to missing label.")
            continue

        images.append(image.numpy())
        labels.append(label)
        collected += 1

    # Select appropriate label dtype
    label_dtype = np.uint8 if max(labels) <= 255 else np.int32

    # Save to .npz file
    np.savez(
        info["output_file"],
        images=np.stack(images),
        labels=np.array(labels, dtype=label_dtype),
    )

    print(f"✅ {name}: {collected} images saved in {info['output_file']}")


# Run the extraction for all dataset subsets
for dataset_name, dataset_info in datasets_info.items():
    extract_and_save_subset(dataset_name, dataset_info)


# Display saved dataset stats
def print_dataset_info(name, data):
    images = data["images"]
    labels = data["labels"]
    print(f"\n--- {name} Dataset Info ---")
    print(f"Total number of images : {images.shape[0]}")
    print(f"Shape of an image      : {images.shape[1:]}  (C, H, W)")
    print(f"Labels shape           : {labels.shape}")
    unique_labels = np.unique(labels)
    print(f"Number of classes      : {len(unique_labels)}")
    print(f"Present classes        : {unique_labels}")
    print("Labels distribution:")
    for lbl in unique_labels:
        print(f"  Label {lbl} : {(labels == lbl).sum()} images")


# Load and print dataset info
for dataset_name, dataset_info in datasets_info.items():
    data = np.load(dataset_info["output_file"])
    print_dataset_info(dataset_name, data)

IV. Data preprocessing#

1. Normalize CIFAR-10 and GTSRB NumPy images#

[ ]:

# Mean and std for normalization
mean = np.array([0.4914, 0.4822, 0.4465])[:, None, None]
std = np.array([0.2023, 0.1994, 0.2010])[:, None, None]


def normalize_images_vectorized(images, mean, std):
    return ((images - mean) / std).astype(np.float32)


# Paths to dataset subsets
subset_paths = {
    "CIFAR-10-1000": "./examples/tutorials/Dropout_custom_implementation/NumPy_images/CIFAR-10/cifar10_subset_1000.npz",
    "CIFAR-10-3000": "./examples/tutorials/Dropout_custom_implementation/NumPy_images/CIFAR-10/cifar10_subset_3000.npz",
    "GTSRB-1000": "./examples/tutorials/Dropout_custom_implementation/NumPy_images/GTSRB/gtsrb_subset_1000.npz",
    "GTSRB-3000": "./examples/tutorials/Dropout_custom_implementation/NumPy_images/GTSRB/gtsrb_subset_3000.npz",
}

# Process each subset
for name, path in subset_paths.items():
    data = np.load(path)
    images = data["images"]
    labels = data["labels"]

    # Normalize images
    normalized_images = normalize_images_vectorized(images, mean, std)

    # Save normalized version (overwrite or save as new)
    normalized_path = path.replace(".npz", "_normalized.npz")
    np.savez(normalized_path, images=normalized_images, labels=labels)

    # Verification print
    print(f"✅ {name}: normalized and saved to {normalized_path}")
    print(f"   Shape: {normalized_images.shape}, dtype: {normalized_images.dtype}")

2. Convert the normalized NumPy images to Aidge tensors (CIFAR-10 & GTSRB)#

[ ]:

# Batch conversion function
def convert_batch_to_aidge_tensor(normalized_images):
    return aidge_core.Tensor(normalized_images)


# Paths to normalized .npz files
normalized_paths = {
    "CIFAR-10-1000": "./examples/tutorials/Dropout_custom_implementation/NumPy_images/CIFAR-10/cifar10_subset_1000_normalized.npz",
    "CIFAR-10-3000": "./examples/tutorials/Dropout_custom_implementation/NumPy_images/CIFAR-10/cifar10_subset_3000_normalized.npz",
    "GTSRB-1000": "./examples/tutorials/Dropout_custom_implementation/NumPy_images/GTSRB/gtsrb_subset_1000_normalized.npz",
    "GTSRB-3000": "./examples/tutorials/Dropout_custom_implementation/NumPy_images/GTSRB/gtsrb_subset_3000_normalized.npz",
}

# Dictionary to store Aidge tensors
aidge_tensors = {}

# Convert each dataset
for name, path in normalized_paths.items():
    data = np.load(path)
    normalized_images = data["images"]
    aidge_tensor = convert_batch_to_aidge_tensor(normalized_images)
    aidge_tensors[name] = aidge_tensor

    # Verification
    shape = aidge_tensor.dims if hasattr(aidge_tensor, "dims") else "N/A"

V. Model deployment#

[ ]:

# Configure the Aidge model for inference
aidge_model.set_datatype(aidge_core.dtype.float32)
aidge_model.set_backend("cpu")
scheduler = aidge_core.SequentialScheduler(aidge_model)

# Create and add an input node to the model graph
input_node = aidge_core.Producer([1, 3, 128, 128], "DataProvider")
input_node.add_child(aidge_model, 0, aidge_model.get_ordered_inputs()[0])
aidge_model.add(input_node)

# Verify the input node was added
in_node = aidge_model.get_node("DataProvider")
print(in_node.name())
print(in_node.type())

VI. Uncertainty estimation using Monte Carlo Dropout#

1.Class label definitions#

[ ]:

# Define CIFAR-10 dataset class names
cifar_classes = [
    "airplane",
    "automobile",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck",
]

# Define GTSRB dataset class names
gtsrb_classes = [
    "Speed limit (20km/h)",
    "Speed limit (30km/h)",
    "Speed limit (50km/h)",
    "Speed limit (60km/h)",
    "Speed limit (70km/h)",
    "Speed limit (80km/h)",
    "End of speed limit (80km/h)",
    "Speed limit (100km/h)",
    "Speed limit (120km/h)",
    "No overtaking",
    "No overtaking (trucks)",
    "Priority at next intersection",
    "Priority road",
    "Give way",
    "Stop",
    "No vehicles",
    "No vehicles (trucks)",
    "No entry",
    "General caution",
    "Dangerous curve left",
    "Dangerous curve right",
    "Double curve",
    "Bumpy road",
    "Slippery road",
    "Road narrows on the right",
    "Road work",
    "Traffic signals",
    "Pedestrians",
    "Children crossing",
    "Bicycles crossing",
    "Beware of ice/snow",
    "Wild animals crossing",
    "End of all speed and passing limits",
    "Turn right ahead",
    "Turn left ahead",
    "Ahead only",
    "Go straight or right",
    "Go straight or left",
    "Keep right",
    "Keep left",
    "Roundabout mandatory",
    "End of no overtaking",
    "End of no overtaking (trucks)",
]

2. Enable dropout at inference#

[ ]:

def enable_dropout(aidge_model):
    """Function to enable the dropout layers during test-time"""
    for m in aidge_model.modules():
        if m.__class__.__name__.startswith("Dropout"):
            m.train()  # Set dropout layers to train mode

3. Define Softmax function#

[ ]:

def my_softmax(x):
    """Compute softmax values for each set of scores in x"""
    x = np.squeeze(x)
    e_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return e_x / np.sum(e_x, axis=-1, keepdims=True)

4. Set the number of Monte Carlo Dropout forward samples#

[ ]:

num_mcd_samples = 30

5. Compute CIFAR-10 vs GTSRB entropy#

[ ]:

# Custom Entropy Functions
def custom_entropy(prob_dist, eps=1e-12):
    """
    Custom entropy calculation.
    prob_dist: 1D array of probabilities summing to 1
    """
    prob_dist = np.clip(prob_dist, eps, 1.0)
    return -np.sum(prob_dist * np.log(prob_dist))


def compute_entropy(mcd_prob_samples):
    """
    mcd_prob_samples: numpy array shape (num_mcd_samples, num_classes)
    Returns: predictive_entropy
    """
    mean_prob = np.mean(mcd_prob_samples, axis=0)
    predictive_entropy = custom_entropy(mean_prob)
    return predictive_entropy


def run_mcd_and_collect_uncertainties(
    model,
    scheduler,
    batch_tensor,
    input_node,
    num_mcd_samples,
    dataset_name,
    class_names,
):
    input_node.get_operator().set_output(0, batch_tensor)
    outputs = []

    output_nodes = model.get_output_nodes()
    output_node = list(output_nodes)[-1]

    for _ in range(num_mcd_samples):
        scheduler.forward()
        output_aidge = output_node.get_operator().get_output(0)
        output_array = np.array(output_aidge)
        if output_array.ndim == 1:
            output_array = output_array[np.newaxis, :]
        outputs.append(output_array)  # <-- inside the loop!

    outputs = np.stack(outputs)  # (num_mcd_samples, batch_size, num_classes)
    if outputs.shape[0] != num_mcd_samples:
        raise ValueError(
            f"Expected {num_mcd_samples} MCD samples, but got {outputs.shape[0]}"
        )
    else:
        print(f"✅ Each image was passed through the model {num_mcd_samples} times.")

    # Apply softmax to each MCD sample output
    softmax_outputs = np.array([my_softmax(output) for output in outputs])

    # Handle possible 2D case (batch size = 1)
    if softmax_outputs.ndim == 2:
        softmax_outputs = softmax_outputs[:, np.newaxis, :]  # add batch dim

    batch_size = softmax_outputs.shape[1]

    entropies = []
    for i in range(batch_size):
        mcd_prob_samples = softmax_outputs[:, i, :]
        ent = compute_entropy(mcd_prob_samples)
        entropies.append(ent)

        mean_prob = np.mean(mcd_prob_samples, axis=0)
        pred_class_idx = np.argmax(mean_prob)
        pred_class_name = class_names[pred_class_idx]
        max_proba = mean_prob[pred_class_idx]

        print(
            f"[{dataset_name}] Image {i}: predicted class = {pred_class_name} (index {pred_class_idx}) | Proba max = {max_proba:.4f}"
        )

    return np.array(entropies)


# Helper to get batch size from aidge tensor
def get_num_images_from_tensor(tensor):
    if hasattr(tensor, "dims"):
        return tensor.dims()[0]
    else:
        return tensor.shape[0]


# Run MCD for all subsets
entropy_results = {}

for subset_size in ["1000", "3000"]:
    # CIFAR-10 subset
    key_cifar = f"CIFAR-10-{subset_size}"
    aidge_batch_tensor_cifar = aidge_tensors[key_cifar]
    num_images_cifar = get_num_images_from_tensor(aidge_batch_tensor_cifar)

    print(f"\nRunning MCD for {key_cifar} with {num_images_cifar} images...")
    entropy_cifar = run_mcd_and_collect_uncertainties(
        aidge_model,
        scheduler,
        aidge_batch_tensor_cifar,
        input_node,
        num_mcd_samples,
        dataset_name=key_cifar,
        class_names=cifar_classes,
    )
    entropy_results[key_cifar] = entropy_cifar

    # GTSRB subset
    key_gtsrb = f"GTSRB-{subset_size}"
    aidge_batch_tensor_gtsrb = aidge_tensors[key_gtsrb]
    num_images_gtsrb = get_num_images_from_tensor(aidge_batch_tensor_gtsrb)

    print(f"\nRunning MCD for {key_gtsrb} with {num_images_gtsrb} images...")
    entropy_gtsrb = run_mcd_and_collect_uncertainties(
        aidge_model,
        scheduler,
        aidge_batch_tensor_gtsrb,
        input_node,
        num_mcd_samples,
        dataset_name=key_gtsrb,
        class_names=gtsrb_classes,
    )
    entropy_results[key_gtsrb] = entropy_gtsrb

6. Visualize the result#

[ ]:

plot_titles = {
    "CIFAR-10-1000": "CIFAR-10 (1000 images)",
    "CIFAR-10-3000": "CIFAR-10 (3000 images)",
    "GTSRB-1000": "GTSRB (1000 images)",
    "GTSRB-3000": "GTSRB (3000 images)",
}


def get_info_string(
    num_mcd_samples, num_images_cifar, num_images_gtsrb, ind_split, entropy_func_name
):
    return (
        f"Num MCD samples = {num_mcd_samples} | "
        f"Images number (CIFAR/GTSRB) = ({num_images_cifar}/{num_images_gtsrb}) | "
        f"InD split = {ind_split} | "
        f"Entropy function = {entropy_func_name}"
    )


# Setup
ind_split = "Train" if datasets_info["CIFAR-10-1000"].get("train", False) else "Test"
use_custom_entropy = True
entropy_func_name = "custom_entropy" if use_custom_entropy else "scipy_entropy"

# Create figure and axes
fig, axes = plt.subplots(2, 2, figsize=(16, 14))

# Main title
fig.suptitle(
    "Predictive Entropy Comparison Between InD (CIFAR-10) and OoD (GTSRB) Using Monte Carlo Dropout",
    fontsize=20,
    fontweight="bold",
    y=0.98,
)

dataset_pairs = [("CIFAR-10-1000", "GTSRB-1000"), ("CIFAR-10-3000", "GTSRB-3000")]

# Subtitle y-positions for each row
subtitle_ys = [0.90, 0.44]

# Define fixed bins for histogram (optional: adjust based on your entropy range)
bins = np.linspace(0, 1.5, 50)

for i, (cifar_key, gtsrb_key) in enumerate(dataset_pairs):
    row = i
    entropy_cifar = entropy_results.get(cifar_key, [])
    entropy_gtsrb = entropy_results.get(gtsrb_key, [])
    num_images_cifar = len(entropy_cifar)
    num_images_gtsrb = len(entropy_gtsrb)

    # Boxplot with custom median colors
    box = axes[row, 0].boxplot(
        [entropy_cifar, entropy_gtsrb], labels=["CIFAR-10", "GTSRB"]
    )

    # Set median line colors to match histogram colors
    median_colors = ["skyblue", "salmon"]
    for median_line, color in zip(box["medians"], median_colors):
        median_line.set_color(color)
        median_line.set_linewidth(2)

    axes[row, 0].set_title(
        f"Boxplot: {plot_titles[cifar_key]} vs {plot_titles[gtsrb_key]}", pad=15
    )
    axes[row, 0].set_ylabel("Entropy")

    # Histogram
    axes[row, 1].hist(
        entropy_cifar,
        bins=bins,
        alpha=0.6,
        label="CIFAR-10",
        color="skyblue",
        edgecolor="black",
    )
    axes[row, 1].hist(
        entropy_gtsrb,
        bins=bins,
        alpha=0.6,
        label="GTSRB",
        color="salmon",
        edgecolor="black",
    )
    axes[row, 1].set_title(
        f"Histogram: {plot_titles[cifar_key]} vs {plot_titles[gtsrb_key]}", pad=15
    )
    axes[row, 1].set_xlabel("Entropy")
    axes[row, 1].set_ylabel("Frequency")
    axes[row, 1].legend()

    # Subtitle
    info_str = get_info_string(
        num_mcd_samples,
        num_images_cifar,
        num_images_gtsrb,
        ind_split,
        entropy_func_name,
    )
    fig.text(0.5, subtitle_ys[i], info_str, ha="center", fontsize=12, fontweight="bold")

# Layout adjustment for spacing titles
plt.tight_layout(rect=[0, 0, 1, 0.92], h_pad=6.0)
plt.show()