Database MNIST#

This tutorial demonstrate the usage of databases and data providers to perform an evaluation on a model using Aidge.

Installation and Requirements#

Python packages : aidge_core, aidge_backend_cpu, aidge_backend_opencv
Download MNIST database
Download MLP onnx model from git-lfs
Define model visualization function and top-1 accuracy metric

[1]:

import os
import requests

def download_material(path: str) -> None:
    if not os.path.isfile(path):
        response = requests.get("https://gitlab.eclipse.org/eclipse/aidge/aidge/-/raw/dev/examples/tutorials/101_first_step/"+path+"?ref_type=heads")
        if response.status_code == 200:
            with open(path, 'wb') as f:
                f.write(response.content)
            print("File downloaded successfully.")
        else:
            print("Failed to download file. Status code:", response.status_code)

# Download onnx model file
download_material("MLP_MNIST.onnx")

File downloaded successfully.

[2]:

import os
import urllib.request
import gzip
import shutil

mnist_dir = 'MNIST_test'
os.makedirs(mnist_dir, exist_ok=True)

base_url = 'https://ossci-datasets.s3.amazonaws.com/mnist/'
files = ['t10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz']

for file in files:
    url = base_url + file
    file_path = os.path.join(mnist_dir, file)
    decompressed_file_path = os.path.splitext(file_path)[0]
    if not os.path.exists(decompressed_file_path):
        print("Downloading", file)
        urllib.request.urlretrieve(url, file_path)
        print("Download complete")
        decompressed_file_path = os.path.splitext(file_path)[0]
        print("Decompressing", file)
        raw = gzip.open(file_path, 'rb').read()
        open(decompressed_file_path, 'wb').write(raw)
        print("Decompression complete")
    else:
        print(f"{file} already exists. Skipping download and decompression.")

Downloading t10k-images-idx3-ubyte.gz
Download complete
Decompressing t10k-images-idx3-ubyte.gz
Decompression complete
Downloading t10k-labels-idx1-ubyte.gz
Download complete
Decompressing t10k-labels-idx1-ubyte.gz
Decompression complete

[3]:

import base64
from IPython.display import Image, display
import matplotlib.pyplot as plt

def visualize_mmd(path_to_mmd):
  with open(path_to_mmd, "r") as file_mmd:
    graph_mmd = file_mmd.read()

  graphbytes = graph_mmd.encode("ascii")
  base64_bytes = base64.b64encode(graphbytes)
  base64_string = base64_bytes.decode("ascii")
  display(Image(url=f"https://mermaid.ink/img/{base64_string}"))

[4]:

def top1_accuracy(predictions, labels):
    total = len(predictions)

    predicted_class = predictions.argmax(axis=1)
    correct_pred = (predicted_class == labels).sum()

    accuracy = correct_pred / total
    return accuracy

Perform an evaluation of the LeNet-like on Aidge#

Import Aidge libraries
Import ONNX model
Configure the model for inference
Create the Database and DataProvider
Perform the evaluation

[5]:

import aidge_core
import aidge_backend_opencv
import aidge_backend_cpu
import aidge_onnx
import numpy as np

[6]:

model = aidge_onnx.load_onnx("./MLP_MNIST.onnx")
aidge_core.remove_flatten(model)
model.save("mySupportedModel")
visualize_mmd("mySupportedModel.mmd")

[7]:

# Configure the model
model.set_datatype(aidge_core.dtype.float32)
model.set_backend("cpu")

# Define the scheduler
scheduler = aidge_core.SequentialScheduler(model)

[8]:

val_mnist = aidge_backend_opencv.MNIST(dataPath="./MNIST_test",
                                       train=False,
                                       load_data_in_memory=False)

val_dataprovider = aidge_core.DataProvider(val_mnist,
                                           batch_size=200,
                                           shuffle=True,
                                           drop_last=False)

[9]:

val_acc = 0

for i, (data_batch, lbl_batch) in enumerate(val_dataprovider):

    data_batch.set_datatype(aidge_core.dtype.float32)
    lbl_batch.set_datatype(aidge_core.dtype.float32)

    # Run inference !
    scheduler.forward(data=[data_batch])

    # Get output and label in a numpy array
    output_aidge = np.array(list(model.get_output_nodes())[0].get_operator().get_output(0))
    lbl = np.array(lbl_batch)

    # Compute the top-1 accuracy
    val_acc += top1_accuracy(output_aidge, lbl.flatten())

val_acc = val_acc / len(val_dataprovider)

print(val_acc)

0.9634