Quantized model C++ export#

[ ]:
%pip install requests numpy ipywidgets ipycanvas
[ ]:
# Aidge modules imports
import aidge_core
import aidge_backend_cpu
import aidge_onnx
import aidge_export_cpp
import aidge_quantization
from aidge_export_cpp.export_utils import (
    cpp_fuse_to_metaops,
    set_nodes_names,
    set_nodes_datatypes,
    exclude_unwanted_producers)

from aidge_core.export_utils import remove_optional_inputs
# For database
from torchvision import transforms, datasets
import numpy as np
[ ]:
# Global variables
USE_CUDA      = False
if USE_CUDA:
    import aidge_backend_cuda
BACKEND       = "cuda" if USE_CUDA else "cpu"
MODEL_NAME    = "lenet"
EXPORT_FOLDER   = f"{MODEL_NAME}_export_int8"

## PTQ Variables
NB_TEST       = 10
NB_CALIB      = 20
NB_BITS       = 8
TARGET_TYPE   = aidge_core.dtype.int32
OPTIM_SIGN    = False
CLIPPING      = aidge_quantization.Clipping.MSE  # 'MAX'
SINGLE_SHIFT  = True
[ ]:
def propagate(model, scheduler, tensor):
    """
    Propagate the given tensor into the model and return the
    output tensor.
    """
    print(f"Propagate: {tensor.backend()}")
    # Run the inference
    scheduler.forward(True, [tensor])
    # Gather the results
    output_node = model.get_output_nodes().pop()
    output_tensor = output_node.get_operator().get_output(0).clone()
    output_tensor.set_backend("cpu")
    return np.array(output_tensor)

Download the model#

[ ]:
file_url = "https://huggingface.co/EclipseAidge/LeNet/resolve/main/lenet_mnist.onnx?download=true"
file_path = MODEL_NAME + "_mnist.onnx"
aidge_core.utils.download_file(file_path, file_url)

Create database to quantize model#

[ ]:
transform = transforms.ToTensor()
test_set  = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

tensors = []
labels  = []
index = 0
for in_tensor, label in test_set:
    array = np.array(in_tensor)
    array = np.reshape(array, (1, 1, 28, 28))
    tensor = aidge_core.Tensor(array)
    tensor.set_backend(BACKEND)
    tensor.set_datatype(aidge_core.dtype.float32)
    tensors.append(tensor)
    labels.append(label)
    index += 1
    if (index == max(NB_TEST, NB_CALIB)):
        break

Load the model in Aidge and manipulate it#

[ ]:
model = aidge_onnx.load_onnx(file_path, verbose=False)
aidge_core.remove_flatten(model)
aidge_core.fuse_batchnorm(model)
aidge_core.expand_metaops(model)
model.set_datatype(aidge_core.dtype.float32)
model.set_backend(BACKEND)
model.save("imported_model")

Checking accuracy:

[ ]:
scheduler = aidge_core.SequentialScheduler(model)

print('\n EXAMPLE INFERENCES :')
nb_valid = 0
base_values = []
for i in range(NB_TEST):
    output_array = propagate(model, scheduler, tensors[i])
    print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array))
    base_values.append(np.max(output_array))
    if (labels[i] == np.argmax(output_array)):
        nb_valid += 1
accuracy = nb_valid / NB_TEST
print('\n MODEL ACCURACY = ', accuracy * 100, '%')

Quantize the model to int8#

[ ]:
aidge_quantization.quantize_network(
        network = model,
        nb_bits = NB_BITS,
        calibration_set = tensors[0:NB_CALIB],
        clipping_mode = CLIPPING,
        target_type = TARGET_TYPE,
        no_quant = False,
        optimize_signs = OPTIM_SIGN,
        single_shift = SINGLE_SHIFT,
        use_cuda = USE_CUDA,
        fold_graph = True)


Once the quantization is done, the graph now only accepts integer inputs. So we need to rescale the dataset for the data to be within [0, 255]. Also, tensors should be casted to be the same type as TARGET_TYPE.

[ ]:
rescaling = 2**(NB_BITS-1)-1
for i in range(NB_TEST):
        tensors[i].set_backend("cpu")
        array = np.array(tensors[i]) * rescaling
        array = np.round(array).astype(int)
        tensors[i] = aidge_core.Tensor(array)
        tensors[i].set_datatype(TARGET_TYPE)
        tensors[i].set_backend("cpu")
# Setting model to CPU for export
model.set_backend("cpu")

Each time the graph has been change, it has to be reset. Here some Quantizer and Cast nodes have been added.

[ ]:
input_node = model.get_ordered_inputs()[0]
input_node[0].get_operator().set_input(0, tensors[0])
scheduler.reset_scheduling()

And now we can assess the new performances after quantization:

[ ]:
print('\n QUANTIZED EXAMPLE INFERENCES:')
nb_valid = 0
post_values = []
for i in range(NB_TEST):
    print(f"QEI: {tensors[i].backend()}")
    output_array = propagate(model, scheduler, tensors[i])
    print(labels[i], ' VS ', np.argmax(output_array), ' -> ', np.max(output_array))
    post_values.append(np.max(output_array))
    if (labels[i] == np.argmax(output_array)):
        nb_valid += 1
quant_accuracy = nb_valid / NB_TEST
print('\n MODEL ACCURACY = ', accuracy * 100, '%')
print('\n QUANTIZED ACCURACY = ', quant_accuracy * 100, '%')
[ ]:
exclude_unwanted_producers(model)

# Fuse nodes
cpp_fuse_to_metaops(model)
remove_optional_inputs(model)
scheduler.reset_scheduling()
scheduler.generate_scheduling() # Scheduler needs to be generated as it has just been reset
set_nodes_names(scheduler)
[ ]:
output_array = propagate(model, scheduler, tensors[0])

print("### Exported Sample ###")
print("Aidge prediction :", np.argmax(output_array), "(" + str(np.max(output_array)) + ")")
print("Label :", labels[0])
set_nodes_datatypes(model)
[31]:
aidge_export_cpp.export(EXPORT_FOLDER,
                        model,
                        scheduler,
                        labels = aidge_core.Tensor(labels[0]),
                        dev_mode = False,
                        aidge_cmp = False)
[ ]:
from subprocess import CalledProcessError

print("\n### Compiling the export ###")
try:
    for std_line in aidge_core.utils.run_command(["make"], cwd=EXPORT_FOLDER):
        print(std_line, end="")
except CalledProcessError as e:
            raise RuntimeError(0, f"An error occurred, failed to build export.") from e
print("\n### Running the export ###")
try:
    for std_line in aidge_core.utils.run_command(["./bin/run_export"], cwd=EXPORT_FOLDER):
        print(std_line, end="")
except CalledProcessError as e:
    raise RuntimeError(0, f"An error occurred, failed to run export.") from e