"""basic_example ******************** Basic MLTK model example - Source code: `basic_example.py `_ This provides a basic example of how to create a `model specification `_. It is based off the `Simple MNIST convnet `_ Keras example. It is designed to work with the `Add an Existing Script to MLTK `_ tutorial. Commands -------------- .. code-block:: shell # Do a "dry run" test training of the model mltk train basic_example-test # Train the model mltk train basic_example # Evaluate the trained model .tflite model mltk evaluate basic_example --tflite # Profile the model in the MVP hardware accelerator simulator mltk profile basic_example --accelerator MVP # Profile the model on a physical development board mltk profile basic_example --accelerator MVP --device # Directly invoke the model script python basic_example.py Model Specification --------------------- .. literalinclude:: ../../../../../../../mltk/models/examples/basic_example.py :language: python :lines: 44- """ from typing import Tuple import numpy as np import tensorflow as tf from mltk import core as mltk_core # For this example we use the MNIST dataset. # NOTE: The following uses the MLTK mnist dataset, # but we could have also used: tf.keras.datasets.mnist from mltk.datasets.image import mnist ########################################################################## # Prepare the model parameters classes = mnist.CLASSES num_classes = len(classes) input_shape = (28, 28, 1) batch_size = 128 epochs = 15 validation_split = 0.1 ########################################################################## # Prepare the dataset def my_dataset_loader( subset:str, test:bool, **kwargs ) -> Tuple[np.ndarray, np.ndarray]: """Load the dataset subset This is called automatically by the MLTK before training or evaluation. Args: subset: The dataset subset to return: 'training' or 'evaluation' test: This is optional, it is used when invoking a training "dryrun", e.g.: mltk train basic_example-test If this is true, then only return a small portion of the dataset for testing purposes Return: A tuple (x,y) for the subset """ # Load the data and split it between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() # Scale images to the [0, 1] range x_train = x_train.astype("float32") / 255 x_test = x_test.astype("float32") / 255 # Make sure images have shape (28, 28, 1) x_train = np.expand_dims(x_train, -1) x_test = np.expand_dims(x_test, -1) # If we're just testing, then truncate the dataset if test: x_test = x_test[:64] x_train = x_train[:64] y_train = y_train[:64] y_test = y_test[:64] # This is optional, but useful for automatically generating a summary of the dataset if subset == 'training': my_model.class_counts['training'] = { my_model.classes[class_id]: count for (class_id, count) in enumerate(np.bincount(y_train)) } else: my_model.class_counts['evaluation'] = { my_model.classes[class_id]: count for (class_id, count) in enumerate(np.bincount(y_test)) } # Convert class vectors to binary class matrices y_train = tf.keras.utils.to_categorical(y_train, num_classes) y_test = tf.keras.utils.to_categorical(y_test, num_classes) if subset == 'training': return x_train, y_train else: return x_test, y_test ########################################################################## # Build the model def my_model_builder(my_model: mltk_core.MltkModel) -> tf.keras.Model: """Build the Keras model This is called by the MLTK just before training starts. Arguments: my_model: The MltkModel instance Returns: Compiled Keras model instance """ model = tf.keras.Sequential([ tf.keras.Input(shape=input_shape), tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), tf.keras.layers.Flatten(), tf.keras.layers.Dropout(0.5), tf.keras.layers.Dense(num_classes, activation="softmax"), ]) model.compile( loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"] ) return model ########################################################################## # Create the MltkModel instance # and set the various properties # @mltk_model class MyModel( mltk_core.MltkModel, # We must inherit the MltkModel class mltk_core.TrainMixin, # We also inherit the TrainMixin since we want to train this model mltk_core.DatasetMixin, # We also need the DatasetMixin mixin to provide the relevant dataset properties mltk_core.EvaluateClassifierMixin, # While not required, also inherit EvaluateClassifierMixin to help will generating evaluation for our classification model ): pass my_model = MyModel() # These properties are optional # but a useful for tracking the generated .tflite my_model.version = 1 my_model.description = 'Basic model specification example' my_model.classes = classes my_model.class_weights = 'balanced' # Automatically generate balanced class weights for training # Required: Set the model build function my_model.build_model_function = my_model_builder # Set the other model properties # These values are passed directly to the model.fit() API # https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit my_model.batch_size = batch_size my_model.epochs = epochs my_model.validation_split = validation_split # NOTE: All the other fit() arguments may also be set in the model, e.g.: # my_model.x = x_train # my_model.y = y_train # my_model.step_per_epoch = 60 # Set the dataset my_model.dataset = my_dataset_loader # NOTE: Since my_dataset_loader() returns the x,y # We do not need to manually set the my_model.x, my_model.y properties. # NOTE: You can also add the various KerasCallbacks # https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/ my_model.train_callbacks = [ tf.keras.callbacks.TerminateOnNaN() ] ########################################################################## # Specify the .tflite conversion parameters # This is used to convert the float32 model to int8 model # that can run on the embedded device. def my_representative_dataset_generator(): """A representative dataset is required generate the .tflite In this example we just take the first 100 samples from the validation set. For more details, see: https://www.tensorflow.org/lite/performance/post_training_integer_quant#convert_using_integer-only_quantization NOTE: Quantization is automatically done at the end of training. It may also be invoked with: mltk train basic_example """ for input_value in tf.data.Dataset.from_tensor_slices(my_model.x).batch(1).take(100): yield [input_value] my_model.tflite_converter['inference_input_type'] = tf.float32 my_model.tflite_converter['inference_output_type'] = tf.float32 my_model.tflite_converter['representative_dataset'] = my_representative_dataset_generator ########################################################################################## # (Optional) Configure model parameters # # While not required, user-defined parameters may be embedded into the .tflite model file. # These parameters may then be read by the embedded device at runtime. # # This is useful for syncing data preprocessing parameters between the model training # script and embedded device. # In my_dataset_loader() we scaled the image data by 1/255. # This same scaling must also happen on the embedded device. # Here, we're embedding the scaling value as "metadata" into the generated .tflite. # At runtime, the embedded device should read this value from the .tflite # and use it accordingly. my_model.model_parameters['samplewise_norm.rescale'] = 1/255. # Most standard Python data types may be embedded # See: https://siliconlabs.github.io/mltk/docs/guides/model_parameters.html my_model.model_parameters['my_boolean'] = True my_model.model_parameters['my_string'] = 'This string will be embedded into the .tflite' my_model.model_parameters['my_bytes'] = b'This byte string will be embedded also' my_model.model_parameters['my_float_list'] = [4.5, 2., 3.14] ########################################################################################## # (Optional) The following allows for running this model training script directly, e.g.: # python basic_example.py # # Note that this has the similar functionality to: # mltk train basic_example # if __name__ == '__main__': from mltk import cli # Setup the CLI logger cli.get_logger(verbose=False) # If this is true then this will do a "dry run" of the model testing # If this is false, then the model will be fully trained test_mode_enabled = True # Train the model # This does the same as issuing the command: mltk train basic_example-test --clean train_results = mltk_core.train_model(my_model, clean=True, test=test_mode_enabled) print(train_results) # Evaluate the model against the quantized .h5 (i.e. float32) model # This does the same as issuing the command: mltk evaluate basic_example-test tflite_eval_results = mltk_core.evaluate_model(my_model, verbose=True, test=test_mode_enabled) print(tflite_eval_results) # Profile the model in the simulator # This does the same as issuing the command: mltk profile basic_example-test profiling_results = mltk_core.profile_model(my_model, test=test_mode_enabled) print(profiling_results)