Statistical validation of the dropout operator using randomization#

Importing libraries#

[ ]:
import numpy as np
import matplotlib.pyplot as plt

import aidge_onnx

Loading the custom dropout implementation ONNX file into Aidge#

[ ]:
aidge_model = aidge_onnx.load_onnx(
    "./examples/tutorials/Dropout_custom_implementation/ONNX_files/MarwaNet.onnx"
)

Dropout statistical validation using Chernoff bounds#

[ ]:
# Function to apply dropout
def apply_dropout(input_tensor, dropout_rate=0.5):
    input_array = np.array(input_tensor)
    dropout_mask = np.random.binomial(
        1, 1 - dropout_rate, size=input_array.shape
    ).astype(np.float32)
    noisy_array = input_array * dropout_mask / (1 - dropout_rate)
    return noisy_array  # Returning array instead of aidge_core.Tensor for simplicity


# Function to run the model with different sample sizes
def run_model(num_samples, dropout_rate=0.5):
    fixed_input = np.array(
        [[1.0, 2.0, 3.0, 4.0, 5.0], [6.0, 7.0, 8.0, 9.0, 10.0]], dtype=np.float32
    )
    input_tensor = fixed_input

    # Initialize the Dropout model with the dropout_rate
    # Assuming aidge_core.Dropout() and aidge_core.SequentialScheduler() are properly defined in your environment
    # aidge_model = aidge_core.Dropout(probability=dropout_rate)
    # scheduler = aidge_core.SequentialScheduler(aidge_model)

    output_sum = np.zeros((2, 5))
    accuracies = []

    for _ in range(num_samples):
        noisy_input = apply_dropout(input_tensor, dropout_rate)
        # scheduler.forward(data=[noisy_input])  # Assuming this is how you run the model

        # Simulating model output as the average of noisy input (for demonstration purposes)
        output_aidge = noisy_input  # Placeholder for actual model output
        output_sum += output_aidge

        accuracy = np.sum((output_aidge - fixed_input) ** 2)
        accuracies.append(accuracy)

    expected_output = output_sum / num_samples
    difference = np.linalg.norm(fixed_input - expected_output)

    print("\nInput tensor shape: {}".format(np.array(input_tensor).shape))
    print("Original (Clean) input tensor:\n", input_tensor)
    print("\nExpected output tensor shape: {}".format(np.array(noisy_input).shape))
    print(
        f"Expected value of outputs after {num_samples} noisy samples:\n",
        expected_output,
    )
    print(
        "Difference between clean input and expected output (scalar value):", difference
    )


# Function to calculate required samples using the Chernoff bound
def calculate_required_samples(epsilon, delta):
    return int(np.ceil(1 / (epsilon**2) * np.log(1 / delta)))


# Main execution
delta = 0.01  # confidence level
epsilon_values = np.linspace(0.01, 0.1, 10)  # range of accuracy values
sample_counts = [
    calculate_required_samples(epsilon, delta) for epsilon in epsilon_values
]

# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(epsilon_values, sample_counts, marker="o")
plt.title("Samples Required by Chernoff Bound as a Function of Accuracy")
plt.xlabel("Accuracy (ε)")
plt.ylabel("Number of Samples (n)")
plt.grid(True)
plt.xscale("linear")
plt.yscale("linear")
plt.xticks(epsilon_values)
plt.show()

# Run the model with different num_samples values
sample_values = [1060, 6623, 26492]
for samples in sample_values:
    print(f"\nRunning model with {samples} samples following the Chernoff Bound\n")
    run_model(num_samples=samples)

Convergence of expected output to clean input with dropout#

[ ]:
# Function to apply dropout
def apply_dropout(input_tensor, dropout_rate=0.5):
    input_array = np.array(input_tensor)
    dropout_mask = np.random.binomial(
        1, 1 - dropout_rate, size=input_array.shape
    ).astype(np.float32)
    noisy_array = input_array * dropout_mask / (1 - dropout_rate)
    return noisy_array  # Returning array for simplicity


# Function to run the model with different sample sizes
def run_model(num_samples, dropout_rate=0.5):
    fixed_input = np.array(
        [[1.0, 2.0, 3.0, 4.0, 5.0], [6.0, 7.0, 8.0, 9.0, 10.0]], dtype=np.float32
    )
    input_tensor = fixed_input

    output_sum = np.zeros_like(fixed_input)

    for _ in range(num_samples):
        noisy_input = apply_dropout(input_tensor, dropout_rate)
        output_sum += noisy_input

    expected_output = output_sum / num_samples
    difference = np.linalg.norm(fixed_input - expected_output)

    return difference


# Main execution to plot convergence
num_samples_list = [10, 50, 100, 500, 1000, 5000, 10000, 20000]
differences = [run_model(num_samples=samples) for samples in num_samples_list]

# Plotting the convergence
plt.figure(figsize=(10, 6))
plt.plot(num_samples_list, differences, marker="o", label="Convergence Trend")
plt.title("Convergence of Expected Output to Clean Input")
plt.xlabel("Number of Samples")
plt.ylabel("Difference (Norm)")
plt.grid(True)
plt.xscale("log")  # Use log scale to better show convergence
plt.yscale("log")  # Use log scale for differences
plt.legend()
plt.show()