Database MNIST#

This tutorial demonstrate the usage of databases and data providers to perform an evaluation on a model using Aidge.

Installation and Requirements#

  • Python packages : aidge_core, aidge_backend_cpu, aidge_backend_opencv

  • Download MNIST database

  • Download MLP onnx model from git-lfs

  • Define model visualization function and top-1 accuracy metric

[1]:
import os
import requests

def download_material(path: str) -> None:
    if not os.path.isfile(path):
        response = requests.get("https://gitlab.eclipse.org/eclipse/aidge/aidge/-/raw/dev/examples/tutorials/101_first_step/"+path+"?ref_type=heads")
        if response.status_code == 200:
            with open(path, 'wb') as f:
                f.write(response.content)
            print("File downloaded successfully.")
        else:
            print("Failed to download file. Status code:", response.status_code)

# Download onnx model file
download_material("MLP_MNIST.onnx")

File downloaded successfully.
[2]:
import os
import urllib.request
import gzip
import shutil

mnist_dir = 'MNIST_test'
os.makedirs(mnist_dir, exist_ok=True)

base_url = 'https://ossci-datasets.s3.amazonaws.com/mnist/'
files = ['t10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz']

for file in files:
    url = base_url + file
    file_path = os.path.join(mnist_dir, file)
    decompressed_file_path = os.path.splitext(file_path)[0]
    if not os.path.exists(decompressed_file_path):
        print("Downloading", file)
        urllib.request.urlretrieve(url, file_path)
        print("Download complete")
        decompressed_file_path = os.path.splitext(file_path)[0]
        print("Decompressing", file)
        raw = gzip.open(file_path, 'rb').read()
        open(decompressed_file_path, 'wb').write(raw)
        print("Decompression complete")
    else:
        print(f"{file} already exists. Skipping download and decompression.")
Downloading t10k-images-idx3-ubyte.gz
Download complete
Decompressing t10k-images-idx3-ubyte.gz
Decompression complete
Downloading t10k-labels-idx1-ubyte.gz
Download complete
Decompressing t10k-labels-idx1-ubyte.gz
Decompression complete
[3]:

import base64 from IPython.display import Image, display import matplotlib.pyplot as plt def visualize_mmd(path_to_mmd): with open(path_to_mmd, "r") as file_mmd: graph_mmd = file_mmd.read() graphbytes = graph_mmd.encode("ascii") base64_bytes = base64.b64encode(graphbytes) base64_string = base64_bytes.decode("ascii") display(Image(url=f"https://mermaid.ink/img/{base64_string}"))
[4]:
def top1_accuracy(predictions, labels):
    total = len(predictions)

    predicted_class = predictions.argmax(axis=1)
    correct_pred = (predicted_class == labels).sum()

    accuracy = correct_pred / total
    return accuracy

Perform an evaluation of the LeNet-like on Aidge#

  • Import Aidge libraries

  • Import ONNX model

  • Configure the model for inference

  • Create the Database and DataProvider

  • Perform the evaluation

[5]:
import aidge_core
import aidge_backend_opencv
import aidge_backend_cpu
import aidge_onnx
import numpy as np
[6]:
model = aidge_onnx.load_onnx("./MLP_MNIST.onnx")
aidge_core.remove_flatten(model)
model.save("mySupportedModel")
visualize_mmd("mySupportedModel.mmd")
Warning: an error occured when trying to load node 'Flatten' of type flatten.
Loading node using a generic operator.
Please report this issue at https://gitlab.eclipse.org/eclipse/aidge/aidge_onnx
by providing your ONNX model and the following error:
ONNX_NODE_CONVERTER_ returned: module 'aidge_core' has no attribute 'Flatten'

- Flatten (Flatten | GenericOperator)
        - axis : 1
[7]:
# Configure the model
model.set_datatype(aidge_core.dtype.float32)
model.set_backend("cpu")

# Define the scheduler
scheduler = aidge_core.SequentialScheduler(model)
[8]:
val_mnist = aidge_backend_opencv.MNIST(dataPath="./MNIST_test",
                                       train=False,
                                       load_data_in_memory=False)

val_dataprovider = aidge_core.DataProvider(val_mnist,
                                           batch_size=200,
                                           shuffle=True,
                                           drop_last=False)

[9]:
val_acc = 0

for i, (data_batch, lbl_batch) in enumerate(val_dataprovider):

    data_batch.set_datatype(aidge_core.dtype.float32)
    lbl_batch.set_datatype(aidge_core.dtype.float32)

    # Run inference !
    scheduler.forward(data=[data_batch])

    # Get output and label in a numpy array
    output_aidge = np.array(list(model.get_output_nodes())[0].get_operator().get_output(0))
    lbl = np.array(lbl_batch)

    # Compute the top-1 accuracy
    val_acc += top1_accuracy(output_aidge, lbl.flatten())

val_acc = val_acc / len(val_dataprovider)

print(val_acc)


0.9634