Data provider tutorial#
This tutorial demonstrates the usage of databases and data providers to perform an evaluation on a model using Aidge.
Installation and Requirements#
Python packages : aidge_core, aidge_backend_cpu, aidge_backend_opencv
Download MNIST database
Download MLP onnx model from git-lfs
Define model visualization function and top-1 accuracy metric
Note: When running this notebook on Binder, all required packages are pre-installed.
[ ]:
%pip install --force-reinstall aidge-core \
aidge-onnx \
aidge-backend-cpu \
aidge-backend-opencv
Download material:
[ ]:
import aidge_core
BASE_URL = "https://gitlab.eclipse.org/eclipse/aidge/aidge/-/raw/main/examples/tutorials/101_first_step/"
# Download the model
file_name = "MLP_MNIST.onnx"
aidge_core.utils.download_file(file_path=file_name, file_url=f"{BASE_URL}{file_name}")
[ ]:
import os
import urllib.request
import gzip
mnist_dir = 'MNIST_test'
os.makedirs(mnist_dir, exist_ok=True)
BASE_URL = 'https://ossci-datasets.s3.amazonaws.com/mnist/'
files = ['t10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz']
for file in files:
url = BASE_URL + file
file_path = os.path.join(mnist_dir, file)
decompressed_file_path = os.path.splitext(file_path)[0]
if not os.path.exists(decompressed_file_path):
print("Downloading", file)
urllib.request.urlretrieve(url, file_path)
print("Download complete")
decompressed_file_path = os.path.splitext(file_path)[0]
print("Decompressing", file)
raw = gzip.open(file_path, 'rb').read()
open(decompressed_file_path, 'wb').write(raw)
print("Decompression complete")
else:
print(f"{file} already exists. Skipping download and decompression.")
[ ]:
def top1_accuracy(predictions, labels):
total = len(predictions)
predicted_class = predictions.argmax(axis=1)
correct_pred = (predicted_class == labels).sum()
accuracy = correct_pred / total
return accuracy
Perform an evaluation of the LeNet-like on Aidge#
Import Aidge libraries
Import ONNX model
Configure the model for inference
Create the Database and DataProvider
Perform the evaluation
[ ]:
import aidge_backend_cpu
import aidge_backend_opencv
import aidge_model_explorer
import aidge_onnx
import numpy as np
[ ]:
model = aidge_onnx.load_onnx("./MLP_MNIST.onnx")
aidge_core.remove_flatten(model)
aidge_model_explorer.visualize(model, "MLP_MNIST", embed=True)
[ ]:
# Configure the model
model.set_datatype(aidge_core.dtype.float32)
model.set_backend("cpu")
# Define the scheduler
scheduler = aidge_core.SequentialScheduler(model)
[ ]:
val_mnist = aidge_backend_opencv.MNIST(dataPath="./MNIST_test",
train=False,
load_data_in_memory=False)
val_dataprovider = aidge_core.DataProvider(val_mnist,
batch_size=200,
shuffle=True,
drop_last=False)
[ ]:
val_acc = 0
for i, (data_batch, lbl_batch) in enumerate(val_dataprovider):
data_batch.set_datatype(aidge_core.dtype.float32)
lbl_batch.set_datatype(aidge_core.dtype.float32)
# Run inference !
scheduler.forward(data=[data_batch])
# Get output and label in a numpy array
output_aidge = np.array(list(model.get_output_nodes())[0].get_operator().get_output(0))
lbl = np.array(lbl_batch)
# Compute the top-1 accuracy
val_acc += top1_accuracy(output_aidge, lbl.flatten())
val_acc = val_acc / len(val_dataprovider)
print(val_acc)