Data provider tutorial#

Binder

This tutorial demonstrates the usage of databases and data providers to perform an evaluation on a model using Aidge.

Installation and Requirements#

  • Python packages : aidge_core, aidge_backend_cpu, aidge_backend_opencv

  • Download MNIST database

  • Download MLP onnx model from git-lfs

  • Define model visualization function and top-1 accuracy metric

Note: When running this notebook on Binder, all required packages are pre-installed.

[ ]:
%pip install --force-reinstall aidge-core \
    aidge-onnx \
    aidge-backend-cpu \
    aidge-backend-opencv

Download material:

[ ]:
import aidge_core

BASE_URL = "https://gitlab.eclipse.org/eclipse/aidge/aidge/-/raw/main/examples/tutorials/101_first_step/"

# Download the model
file_name = "MLP_MNIST.onnx"

aidge_core.utils.download_file(file_path=file_name, file_url=f"{BASE_URL}{file_name}")
[ ]:
import os
import urllib.request
import gzip

mnist_dir = 'MNIST_test'
os.makedirs(mnist_dir, exist_ok=True)

BASE_URL = 'https://ossci-datasets.s3.amazonaws.com/mnist/'
files = ['t10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz']

for file in files:
    url = BASE_URL + file
    file_path = os.path.join(mnist_dir, file)
    decompressed_file_path = os.path.splitext(file_path)[0]
    if not os.path.exists(decompressed_file_path):
        print("Downloading", file)
        urllib.request.urlretrieve(url, file_path)
        print("Download complete")
        decompressed_file_path = os.path.splitext(file_path)[0]
        print("Decompressing", file)
        raw = gzip.open(file_path, 'rb').read()
        open(decompressed_file_path, 'wb').write(raw)
        print("Decompression complete")
    else:
        print(f"{file} already exists. Skipping download and decompression.")
[ ]:
def top1_accuracy(predictions, labels):
    total = len(predictions)

    predicted_class = predictions.argmax(axis=1)
    correct_pred = (predicted_class == labels).sum()

    accuracy = correct_pred / total
    return accuracy

Perform an evaluation of the LeNet-like on Aidge#

  • Import Aidge libraries

  • Import ONNX model

  • Configure the model for inference

  • Create the Database and DataProvider

  • Perform the evaluation

[ ]:
import aidge_backend_cpu
import aidge_backend_opencv
import aidge_model_explorer
import aidge_onnx
import numpy as np
[ ]:
model = aidge_onnx.load_onnx("./MLP_MNIST.onnx")

aidge_core.remove_flatten(model)

aidge_model_explorer.visualize(model, "MLP_MNIST", embed=True)
[ ]:
# Configure the model
model.set_datatype(aidge_core.dtype.float32)
model.set_backend("cpu")

# Define the scheduler
scheduler = aidge_core.SequentialScheduler(model)
[ ]:
val_mnist = aidge_backend_opencv.MNIST(dataPath="./MNIST_test",
                                       train=False,
                                       load_data_in_memory=False)

val_dataprovider = aidge_core.DataProvider(val_mnist,
                                           batch_size=200,
                                           shuffle=True,
                                           drop_last=False)
[ ]:
val_acc = 0

for i, (data_batch, lbl_batch) in enumerate(val_dataprovider):

    data_batch.set_datatype(aidge_core.dtype.float32)
    lbl_batch.set_datatype(aidge_core.dtype.float32)

    # Run inference !
    scheduler.forward(data=[data_batch])

    # Get output and label in a numpy array
    output_aidge = np.array(list(model.get_output_nodes())[0].get_operator().get_output(0))
    lbl = np.array(lbl_batch)

    # Compute the top-1 accuracy
    val_acc += top1_accuracy(output_aidge, lbl.flatten())

val_acc = val_acc / len(val_dataprovider)

print(val_acc)