Database MNIST#
This tutorial demonstrate the usage of databases and data providers to perform an evaluation on a model using Aidge.
Installation and Requirements#
Python packages : aidge_core, aidge_backend_cpu, aidge_backend_opencv
Download MNIST database
Download MLP onnx model from git-lfs
Define model visualization function and top-1 accuracy metric
[1]:
import os
import requests
def download_material(path: str) -> None:
if not os.path.isfile(path):
response = requests.get("https://gitlab.eclipse.org/eclipse/aidge/aidge/-/raw/dev/examples/tutorials/101_first_step/"+path+"?ref_type=heads")
if response.status_code == 200:
with open(path, 'wb') as f:
f.write(response.content)
print("File downloaded successfully.")
else:
print("Failed to download file. Status code:", response.status_code)
# Download onnx model file
download_material("MLP_MNIST.onnx")
File downloaded successfully.
[2]:
import os
import urllib.request
import gzip
import shutil
mnist_dir = 'MNIST_test'
os.makedirs(mnist_dir, exist_ok=True)
base_url = 'https://ossci-datasets.s3.amazonaws.com/mnist/'
files = ['t10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz']
for file in files:
url = base_url + file
file_path = os.path.join(mnist_dir, file)
decompressed_file_path = os.path.splitext(file_path)[0]
if not os.path.exists(decompressed_file_path):
print("Downloading", file)
urllib.request.urlretrieve(url, file_path)
print("Download complete")
decompressed_file_path = os.path.splitext(file_path)[0]
print("Decompressing", file)
raw = gzip.open(file_path, 'rb').read()
open(decompressed_file_path, 'wb').write(raw)
print("Decompression complete")
else:
print(f"{file} already exists. Skipping download and decompression.")
Downloading t10k-images-idx3-ubyte.gz
Download complete
Decompressing t10k-images-idx3-ubyte.gz
Decompression complete
Downloading t10k-labels-idx1-ubyte.gz
Download complete
Decompressing t10k-labels-idx1-ubyte.gz
Decompression complete
[3]:
import base64
from IPython.display import Image, display
import matplotlib.pyplot as plt
def visualize_mmd(path_to_mmd):
with open(path_to_mmd, "r") as file_mmd:
graph_mmd = file_mmd.read()
graphbytes = graph_mmd.encode("ascii")
base64_bytes = base64.b64encode(graphbytes)
base64_string = base64_bytes.decode("ascii")
display(Image(url=f"https://mermaid.ink/img/{base64_string}"))
[4]:
def top1_accuracy(predictions, labels):
total = len(predictions)
predicted_class = predictions.argmax(axis=1)
correct_pred = (predicted_class == labels).sum()
accuracy = correct_pred / total
return accuracy
Perform an evaluation of the LeNet-like on Aidge#
Import Aidge libraries
Import ONNX model
Configure the model for inference
Create the Database and DataProvider
Perform the evaluation
[5]:
import aidge_core
import aidge_backend_opencv
import aidge_backend_cpu
import aidge_onnx
import numpy as np
[6]:
model = aidge_onnx.load_onnx("./MLP_MNIST.onnx")
aidge_core.remove_flatten(model)
model.save("mySupportedModel")
visualize_mmd("mySupportedModel.mmd")
Warning: an error occured when trying to load node 'Flatten' of type flatten.
Loading node using a generic operator.
Please report this issue at https://gitlab.eclipse.org/eclipse/aidge/aidge_onnx
by providing your ONNX model and the following error:
ONNX_NODE_CONVERTER_ returned: module 'aidge_core' has no attribute 'Flatten'
- Flatten (Flatten | GenericOperator)
- axis : 1
[7]:
# Configure the model
model.set_datatype(aidge_core.dtype.float32)
model.set_backend("cpu")
# Define the scheduler
scheduler = aidge_core.SequentialScheduler(model)
[8]:
val_mnist = aidge_backend_opencv.MNIST(dataPath="./MNIST_test",
train=False,
load_data_in_memory=False)
val_dataprovider = aidge_core.DataProvider(val_mnist,
batch_size=200,
shuffle=True,
drop_last=False)
[9]:
val_acc = 0
for i, (data_batch, lbl_batch) in enumerate(val_dataprovider):
data_batch.set_datatype(aidge_core.dtype.float32)
lbl_batch.set_datatype(aidge_core.dtype.float32)
# Run inference !
scheduler.forward(data=[data_batch])
# Get output and label in a numpy array
output_aidge = np.array(list(model.get_output_nodes())[0].get_operator().get_output(0))
lbl = np.array(lbl_batch)
# Compute the top-1 accuracy
val_acc += top1_accuracy(output_aidge, lbl.flatten())
val_acc = val_acc / len(val_dataprovider)
print(val_acc)
0.9634