Skip to main content

TST, Hong Kong

Human Emotion Detection with Tensorflow

Github Repository

Building a Basic Model

import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from sklearn.metrics import (
classification_report,
confusion_matrix,
ConfusionMatrixDisplay)
import seaborn as sns

import tensorflow as tf
# import tensorflow_dataset as tfds
# import tensorflow_probability as tfp
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import (
Callback,
CSVLogger,
EarlyStopping,
LearningRateScheduler,
ModelCheckpoint
)
from tensorflow.keras.layers import (
Layer,
GlobalAveragePooling2D,
Conv2D,
MaxPool2D,
Dense,
Flatten,
InputLayer,
BatchNormalization,
Input,
Dropout,
RandomFlip,
RandomRotation,
Resizing,
Rescaling
)
from tensorflow.keras.losses import BinaryCrossentropy, CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy, TopKCategoricalAccuracy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import L2, L1
from tensorflow.keras.utils import image_dataset_from_directory
BATCH = 32
SIZE = 256
SEED = 444

EPOCHS = 30
LR = 0.001
FILTERS = 6
KERNEL = 3
STRIDES = 1
REGRATE = 0.0
POOL = 2
DORATE = 0.05
NLABELS = 3
DENSE1 = 100
DENSE2 = 10

Dataset

train_directory = './dataset/Emotions Dataset/Emotions Dataset/train'
test_directory = './dataset/Emotions Dataset/Emotions Dataset/test'
LABELS = ['angry', 'happy', 'sad']
train_dataset = image_dataset_from_directory(
train_directory,
labels='inferred',
label_mode='categorical',
class_names=LABELS,
color_mode='rgb',
batch_size=BATCH,
image_size=(SIZE, SIZE),
shuffle=True,
seed=SEED,
# validation_split=0.2,
# subset='validation',
interpolation='bilinear',
follow_links=False,
crop_to_aspect_ratio=False
)

# Found 6799 images belonging to 3 classes.
test_dataset = image_dataset_from_directory(
test_directory,
labels='inferred',
label_mode='categorical',
class_names=LABELS,
color_mode='rgb',
batch_size=BATCH,
image_size=(SIZE, SIZE),
shuffle=True,
seed=SEED
)

# Found 2278 images belonging to 3 classes.
plt.figure(figsize=(16,16))

for images, labels in train_dataset.take(1):
for i in range(16):
ax = plt.subplot(4,4,i+1)
plt.title(LABELS[tf.argmax(labels[i], axis=0).numpy()])
plt.imshow(images[i]/255.)
plt.axis('off')

plt.savefig('assets/tf_Emotion_Detection_01.webp', bbox_inches='tight')

tf Emotion Detection

training_dataset = (
train_dataset.prefetch(
tf.data.AUTOTUNE
)
)

testing_dataset = (
test_dataset.prefetch(
tf.data.AUTOTUNE
)
)

LeNet 5 Model

resize_rescale_layers = Sequential([
Resizing(SIZE, SIZE),
Rescaling(1./255)
])
model_lenet = Sequential([
InputLayer(input_shape=(None, None, 3)),
resize_rescale_layers,
Conv2D(
filters = FILTERS,
kernel_size = KERNEL,
strides = STRIDES,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
MaxPool2D(
pool_size = POOL,
strides = STRIDES*2
),
Dropout(rate=DORATE),
Conv2D(
filters = FILTERS*2+4,
kernel_size = KERNEL,
strides = STRIDES,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
MaxPool2D(
pool_size = POOL,
strides = STRIDES*2
),
Flatten(),
Dense(
DENSE1,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
Dropout(rate=DORATE),
Dense(
DENSE2,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
Dense(
NLABELS,
activation = 'softmax',
name = 'Output'
)
])

model_lenet.build(input_shape=(SIZE, SIZE, 3))
model_lenet.summary()

# Total params: 4,668,319
# Trainable params: 4,668,055
# Non-trainable params: 264
loss_function = CategoricalCrossentropy(
from_logits = False,
label_smoothing = 0.0,
axis = -1,
name = 'categorical_crossentropy'
)
metrics = [
CategoricalAccuracy(name='accuracy'),
TopKCategoricalAccuracy(k=2,name='topk-accuracy')
]
model_lenet.compile(
optimizer = Adam(learning_rate = LR),
loss = loss_function,
metrics = metrics
)

Model Training

history_lenet = model_lenet.fit(
training_dataset,
validation_data = testing_dataset,
epochs = EPOCHS,
verbose = 1
)

# loss: 0.0454
# accuracy: 0.9803
# topk-accuracy: 0.9982
# val_loss: 1.1798
# val_accuracy: 0.7441
# val_topk-accuracy: 0.8955
plt.plot(history_lenet.history['loss'])
plt.plot(history_lenet.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_loss', 'val_loss'])
plt.show()

tf Emotion Detection

plt.plot(history_lenet.history['accuracy'])
plt.plot(history_lenet.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_accuracy', 'val_accuracy'])
plt.show()

tf Emotion Detection

Model Evaluation

model_lenet.evaluate(testing_dataset)
# loss: 1.1798 - accuracy: 0.7441 - topk-accuracy: 0.8955
test_image = cv.imread('./dataset/happy.jpg')

img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = model_lenet(img).numpy()
print(label)
# [[0.1078622 0.8603977 0.03174012]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# happy
test_image = cv.imread('./dataset/sad.jpg')

img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = model_lenet(img).numpy()
print(label)
# [[9.1999680e-02 2.9016874e-04 9.0771013e-01]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# sad
test_image = cv.imread('./dataset/angry.jpg')

img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = model_lenet(img).numpy()
print(label)
# [[9.9998641e-01 1.3307266e-06 1.2283378e-05]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# angry
plt.figure(figsize=(16,16))

for images, labels in test_dataset.take(1):
for i in range(16):
ax = plt.subplot(4,4,i+1)
true = "True: " + LABELS[tf.argmax(labels[i], axis=0).numpy()]
pred = "Predicted: " + LABELS[
tf.argmax(model_lenet(tf.expand_dims(images[i], axis=0)).numpy(), axis=1).numpy()[0]
]
plt.title(
true + "\n" + pred
)
plt.imshow(images[i]/255.)
plt.axis('off')

plt.savefig('assets/tf_Emotion_Detection_04.webp', bbox_inches='tight')

tf Emotion Detection

y_pred = []
y_test = []

for img, label in testing_dataset:
y_pred.append(model_lenet(img))
y_test.append(label.numpy())
conf_mtx = ConfusionMatrixDisplay(
confusion_matrix=confusion_matrix(
np.argmax(y_test[:-1], axis=-1).flatten(),
np.argmax(y_pred[:-1], axis=-1).flatten()
),
display_labels=LABELS
)

fig, ax = plt.subplots(figsize=(16,12))
conf_mtx.plot(ax=ax, cmap='plasma', include_values=False)

plt.savefig('assets/tf_Emotion_Detection_05.webp', bbox_inches='tight')

tf Emotion Detection

Add Data Augmentation to prevent Overfitting

resize_rescale_layers = Sequential([
Resizing(SIZE, SIZE),
Rescaling(1./255)
])
data_augmentation = Sequential([
RandomRotation(factor=0.25),
RandomFlip(mode='horizontal',),
RandomContrast(factor=0.1),
# RandomBrightness(0.1)
])
training_dataset = (
train_dataset
.map(lambda image, label: (data_augmentation(image), label))
.prefetch(tf.data.AUTOTUNE)
)


testing_dataset = (
test_dataset.prefetch(
tf.data.AUTOTUNE
)
)
model_lenet = Sequential([
InputLayer(input_shape=(None, None, 3)),
resize_rescale_layers,
Conv2D(
filters = FILTERS,
kernel_size = KERNEL,
strides = STRIDES,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
MaxPool2D(
pool_size = POOL,
strides = STRIDES*2
),
Dropout(rate=DORATE),
Conv2D(
filters = FILTERS*2+4,
kernel_size = KERNEL,
strides = STRIDES,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
MaxPool2D(
pool_size = POOL,
strides = STRIDES*2
),
Flatten(),
Dense(
DENSE1,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
Dropout(rate=DORATE),
Dense(
DENSE2,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
Dense(
NLABELS,
activation = 'softmax',
name = 'Output'
)
])

model_lenet.build(input_shape=(SIZE, SIZE, 3))
model_lenet.summary()

# Total params: 4,668,319
# Trainable params: 4,668,055
# Non-trainable params: 264
loss_function = CategoricalCrossentropy(
from_logits = False,
label_smoothing = 0.0,
axis = -1,
name = 'categorical_crossentropy'
)
metrics = [
CategoricalAccuracy(name='accuracy'),
TopKCategoricalAccuracy(k=2,name='topk-accuracy')
]
model_lenet.compile(
optimizer = Adam(learning_rate = LR),
loss = loss_function,
metrics = metrics
)

Model Training

history_lenet = model_lenet.fit(
training_dataset,
validation_data = testing_dataset,
epochs = EPOCHS,
verbose = 1
)

# loss: 0.5282 - accuracy: 0.7820
# topk-accuracy: 0.9394
# val_loss: 0.5216
# val_accuracy: 0.7871
# val_topk-accuracy: 0.9407
plt.plot(history_lenet.history['loss'])
plt.plot(history_lenet.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_loss', 'val_loss'])
plt.show()

tf Emotion Detection

plt.plot(history_lenet.history['accuracy'])
plt.plot(history_lenet.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_accuracy', 'val_accuracy'])
plt.show()

tf Emotion Detection

Model Evaluation

model_lenet.evaluate(testing_dataset)
# loss: 0.5216 - accuracy: 0.7871 - topk-accuracy: 0.9407
test_image = cv.imread('./dataset/happy.jpg')

img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = model_lenet(img).numpy()
print(label)
# [[0.1078622 0.8603977 0.03174012]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# [[1.0409105e-03 3.4608482e-04 9.9861300e-01]]
# sad
test_image = cv.imread('./dataset/sad.jpg')

img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = model_lenet(img).numpy()
print(label)
# [[9.1999680e-02 2.9016874e-04 9.0771013e-01]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# [[0.00754709 0.09368072 0.89877224]]
# sad
test_image = cv.imread('./dataset/angry.jpg')

img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = model_lenet(img).numpy()
print(label)
# [[9.9998641e-01 1.3307266e-06 1.2283378e-05]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# [[0.17887418 0.5655646 0.25556126]]
# happy
plt.figure(figsize=(16,16))

for images, labels in test_dataset.take(1):
for i in range(16):
ax = plt.subplot(4,4,i+1)
true = "True: " + LABELS[tf.argmax(labels[i], axis=0).numpy()]
pred = "Predicted: " + LABELS[
tf.argmax(model_lenet(tf.expand_dims(images[i], axis=0)).numpy(), axis=1).numpy()[0]
]
plt.title(
true + "\n" + pred
)
plt.imshow(images[i]/255.)
plt.axis('off')

plt.savefig('assets/tf_Emotion_Detection_08.webp', bbox_inches='tight')

tf Emotion Detection

y_pred = []
y_test = []

for img, label in testing_dataset:
y_pred.append(model_lenet(img))
y_test.append(label.numpy())
conf_mtx = ConfusionMatrixDisplay(
confusion_matrix=confusion_matrix(
np.argmax(y_test[:-1], axis=-1).flatten(),
np.argmax(y_pred[:-1], axis=-1).flatten()
),
display_labels=LABELS
)

fig, ax = plt.subplots(figsize=(16,12))
conf_mtx.plot(ax=ax, cmap='plasma', include_values=False)

plt.savefig('assets/tf_Emotion_Detection_09.webp', bbox_inches='tight')

tf Emotion Detection

Cutmix Data Augmentation

def sample_beta_distribution(size, concentration_0=0.2, concentration_1=0.2):
gamma_1_sample = tf.random.gamma(shape=[size], alpha=concentration_1)
gamma_2_sample = tf.random.gamma(shape=[size], alpha=concentration_0)
return gamma_1_sample / (gamma_1_sample + gamma_2_sample)
data_augmentation = Sequential([
RandomRotation(factor = (0.25),),
RandomFlip(mode='horizontal',),
RandomContrast(factor=0.1),
RandomBrightness(0.1)
])
# CutMix data augmentation function
train_ds_one = train_dataset.map(lambda image, label: (data_augmentation(image), label))
train_ds_two = train_dataset.map(lambda image, label: (data_augmentation(image), label))

train_ds = tf.data.Dataset.zip((train_ds_two, train_ds_two))
@tf.function
def get_box(lambda_value):
cut_rat = tf.math.sqrt(1.0 - lambda_value)

cut_w = SIZE * cut_rat # rw
cut_w = tf.cast(cut_w, tf.int32)

cut_h = SIZE * cut_rat # rh
cut_h = tf.cast(cut_h, tf.int32)

cut_x = tf.random.uniform((1,), minval=0, maxval=SIZE, dtype=tf.int32) # rx
cut_y = tf.random.uniform((1,), minval=0, maxval=SIZE, dtype=tf.int32) # ry

boundaryx1 = tf.clip_by_value(cut_x[0] - cut_w // 2, 0, SIZE)
boundaryy1 = tf.clip_by_value(cut_y[0] - cut_h // 2, 0, SIZE)
bbx2 = tf.clip_by_value(cut_x[0] + cut_w // 2, 0, SIZE)
bby2 = tf.clip_by_value(cut_y[0] + cut_h // 2, 0, SIZE)

target_h = bby2 - boundaryy1
if target_h == 0:
target_h += 1

target_w = bbx2 - boundaryx1
if target_w == 0:
target_w += 1

return boundaryx1, boundaryy1, target_h, target_w
@tf.function
def cutmix(train_ds_one, train_ds_two):
(image1, label1), (image2, label2) = train_ds_one, train_ds_two

alpha = [0.25]
beta = [0.25]

# Get a sample from the Beta distribution
lambda_value = sample_beta_distribution(1, alpha, beta)

# Define Lambda
lambda_value = lambda_value[0][0]

# Get the bounding box offsets, heights and widths
boundaryx1, boundaryy1, target_h, target_w = get_box(lambda_value)

# Get a patch from the second image (`image2`)
crop2 = tf.image.crop_to_bounding_box(
image2, boundaryy1, boundaryx1, target_h, target_w
)
# Pad the `image2` patch (`crop2`) with the same offset
image2 = tf.image.pad_to_bounding_box(
crop2, boundaryy1, boundaryx1, SIZE, SIZE
)
# Get a patch from the first image (`image1`)
crop1 = tf.image.crop_to_bounding_box(
image1, boundaryy1, boundaryx1, target_h, target_w
)
# Pad the `image1` patch (`crop1`) with the same offset
img1 = tf.image.pad_to_bounding_box(
crop1, boundaryy1, boundaryx1, SIZE, SIZE
)

# Modify the first image by subtracting the patch from `image1`
# (before applying the `image2` patch)
image1 = image1 - img1
# Add the modified `image1` and `image2` together to get the CutMix image
image = image1 + image2

# Adjust Lambda in accordance to the pixel ration
lambda_value = 1 - (target_w * target_h) / (SIZE * SIZE)
lambda_value = tf.cast(lambda_value, tf.float32)

# Combine the labels of both images
label = lambda_value * label1 + (1 - lambda_value) * label2
return image, label
# Create the new dataset using our `cutmix` utility
train_ds_cmu = (
train_ds.shuffle(1024)
.map(cutmix, num_parallel_calls=tf.data.AUTOTUNE)
.prefetch(tf.data.AUTOTUNE)
)

test_ds = (
test_dataset.prefetch(tf.data.AUTOTUNE)
)

# Let's preview 9 samples from the dataset
image_batch, label_batch = next(iter(train_ds_cmu))
plt.figure(figsize=(10, 10))
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.title(LABELS[np.argmax(label_batch[i])])
plt.imshow(image_batch[i]/255)
plt.axis("off")

tf Emotion Detection

resize_rescale_layers = Sequential([
Resizing(SIZE, SIZE),
Rescaling(1./255)
])

Model Building

model_lenet = Sequential([
InputLayer(input_shape=(None, None, 3)),
resize_rescale_layers,
Conv2D(
filters = FILTERS,
kernel_size = KERNEL,
strides = STRIDES,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
MaxPool2D(
pool_size = POOL,
strides = STRIDES*2
),
Dropout(rate=DORATE),
Conv2D(
filters = FILTERS*2+4,
kernel_size = KERNEL,
strides = STRIDES,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
MaxPool2D(
pool_size = POOL,
strides = STRIDES*2
),
Flatten(),
Dense(
DENSE1,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
Dropout(rate=DORATE),
Dense(
DENSE2,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
Dense(
NLABELS,
activation = 'softmax',
name = 'Output'
)
])

model_lenet.build()
model_lenet.summary()

# Total params: 6,153,119
# Trainable params: 6,152,855
# Non-trainable params: 264
loss_function = CategoricalCrossentropy(
from_logits = False,
label_smoothing = 0.0,
axis = -1,
name = 'categorical_crossentropy'
)
metrics = [
CategoricalAccuracy(name='accuracy'),
TopKCategoricalAccuracy(k=2,name='topk-accuracy')
]
model_lenet.compile(
optimizer = Adam(learning_rate = LR),
loss = loss_function,
metrics = metrics
)

Model Training

history_lenet = model_lenet.fit(
train_ds_cmu,
validation_data = test_ds,
epochs = EPOCHS,
verbose = 1
)

# loss: 0.8846
# accuracy: 0.6149
# topk-accuracy: 0.8550
# val_loss: 0.6832
# val_accuracy: 0.7226
# val_topk-accuracy: 0.9083
plt.plot(history_lenet.history['loss'])
plt.plot(history_lenet.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_loss', 'val_loss'])
plt.show()

tf Emotion Detection

plt.plot(history_lenet.history['accuracy'])
plt.plot(history_lenet.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_accuracy', 'val_accuracy'])
plt.show()

tf Emotion Detection

Model Evaluation

model_lenet.evaluate(test_ds)
# loss: 0.6832 - accuracy: 0.7226 - topk-accuracy: 0.9083
test_image = cv.imread('./dataset/happy.jpg')

img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = model_lenet(img).numpy()
print(label)
# [[0.1078622 0.8603977 0.03174012]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# [[0.1541878 0.37400016 0.47181207]]
# sad
test_image = cv.imread('./dataset/sad.jpg')

img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = model_lenet(img).numpy()
print(label)
# [[9.1999680e-02 2.9016874e-04 9.0771013e-01]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# [[0.20370252 0.60639006 0.1899074 ]]
# happy
test_image = cv.imread('./dataset/angry.jpg')

img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = model_lenet(img).numpy()
print(label)
# [[9.9998641e-01 1.3307266e-06 1.2283378e-05]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# [[0.07486102 0.8021671 0.1229719 ]]
# happy
plt.figure(figsize=(16,16))

for images, labels in test_dataset.take(1):
for i in range(16):
ax = plt.subplot(4,4,i+1)
true = "True: " + LABELS[tf.argmax(labels[i], axis=0).numpy()]
pred = "Predicted: " + LABELS[
tf.argmax(model_lenet(tf.expand_dims(images[i], axis=0)).numpy(), axis=1).numpy()[0]
]
plt.title(
true + "\n" + pred
)
plt.imshow(images[i]/255.)
plt.axis('off')

plt.savefig('assets/tf_Emotion_Detection_13.webp', bbox_inches='tight')

tf Emotion Detection

y_pred = []
y_test = []

for img, label in test_ds:
y_pred.append(model_lenet(img))
y_test.append(label.numpy())
conf_mtx = ConfusionMatrixDisplay(
confusion_matrix=confusion_matrix(
np.argmax(y_test[:-1], axis=-1).flatten(),
np.argmax(y_pred[:-1], axis=-1).flatten()
),
display_labels=LABELS
)

fig, ax = plt.subplots(figsize=(16,12))
conf_mtx.plot(ax=ax, cmap='plasma', include_values=False)

plt.savefig('assets/tf_Emotion_Detection_14.webp', bbox_inches='tight')

tf Emotion Detection

Saving the Model

# Save the weights
model_lenet.save_weights('./saved_weights/cutmix_weights')

# # Create a new model instance
# model_lenet = create_model()

# # Restore the weights
# model_lenet.load_weights('./checkpoints/my_checkpoint')
model_lenet.save('saved_model/cutmix_model')

restored_model = tf.keras.models.load_model('saved_model/cutmix_model')

# Check its architecture
restored_model.summary()
restored_model.evaluate(test_ds)
# loss: 0.6832 - accuracy: 0.7226 - topk-accuracy: 0.9083

Saving the Augmented Dataset

The TFRecord format is a simple format for storing a sequence of binary records. Protocol buffers are a cross-platform, cross-language library for efficient serialization of structured data.

training_unbatched = (
train_ds_cmu.unbatch()
)

testing_unbatched = (
test_ds.unbatch()
)
def create_example(image, label):
bytes_feature = Feature(
bytes_list = BytesList(value=[image]))

int_feature = Feature(
int64_list = Int64List(value=[label]))

example = Example(
features=Features(feature={
'images': bytes_feature,
'labels': int_feature,
}))

return example.SerializeToString()
def encode_image(image, label):
image = tf.image.convert_image_dtype(image, dtype=tf.uint8)
image = tf.io.encode_jpeg(image)
return image, tf.argmax(label)

encoded_ds = (
training_unbatched.map(encode_image)
)
for shard_number in range(SHARDS):

sharded_ds = (
encoded_ds
.shard(SHARDS, shard_number)
.as_numpy_iterator()
)

with TFRecordWriter(PATH.format(shard_number)) as file_writer:
for image, label in sharded_ds:
file_writer.write(create_example(image, label))

Reconstructing the Augmented Dataset (This part does not work yet!)

shards_list = [PATH.format(i) for i in range(SHARDS-2)]
loaded_training_ds = tf.data.TFRecordDataset(filenames = shards_list)
def parse_tfrecords(example):    
feature_description = {
'images': tf.io.FixedLenFeature([], tf.string),
'labels': tf.io.FixedLenFeature([], tf.int64)
}

example = tf.io.parse_single_example(example, feature_description)
example['images'] = tf.image.convert_image_dtype(
tf.io.decode_image(
example['images'], channels = 3
), dtype = tf.float32
)

return example['images'], example['labels']
parsed_ds = (
loaded_training_ds
.map(parse_tfrecords)
.batch(BATCH)
.prefetch(tf.data.AUTOTUNE)
)
for i in train_ds_cmu.take(1):
print(i)

# tf.Tensor: shape=(32, 256, 256, 3)
for i in parsed_ds.take(1):
print(i)
# tf.Tensor: shape=(32, 256, 256, 3)
model_lenet2 = Sequential([
InputLayer(input_shape=(None, None, 3)),
resize_rescale_layers,
Conv2D(
filters = FILTERS,
kernel_size = KERNEL,
strides = STRIDES,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
MaxPool2D(
pool_size = POOL,
strides = STRIDES*2
),
Dropout(rate=DORATE),
Conv2D(
filters = FILTERS*2+4,
kernel_size = KERNEL,
strides = STRIDES,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
MaxPool2D(
pool_size = POOL,
strides = STRIDES*2
),
Flatten(),
Dense(
DENSE1,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
Dropout(rate=DORATE),
Dense(
DENSE2,
activation = 'relu',
kernel_regularizer = L2(REGRATE)
),
BatchNormalization(),
Dense(
NLABELS,
activation = 'softmax',
name = 'Output'
)
])

model_lenet2.build()
model_lenet2.summary()

# Total params: 4,668,319
# Trainable params: 4,668,055
# Non-trainable params: 264
loss_function2 = SparseCategoricalCrossentropy()

metrics2 = [SparseCategoricalAccuracy(name="accuracy")]

model_lenet2.compile(
optimizer = Adam(learning_rate = LR),
loss = loss_function2,
metrics = metrics2
)
history_lenet2 = model_lenet2.fit(
parsed_ds,
epochs = EPOCHS,
verbose = 1
)

Transfer Learning

Building the Efficient TF Model

# # transfer learning
# backbone = tf.keras.applications.efficientnet.EfficientNetB4(
# include_top=False,
# weights='imagenet',
# input_shape=(SIZE, SIZE, 3)
# )

# ERROR - EfficientNetv1 cannot be saved:
# ValueError: Unable to create a Keras model from SavedModel at saved_model/efficient_model. This SavedModel was exported with `tf.saved_model.save`, and lacks the Keras metadata file. Please save your Keras model by calling `model.save` or `tf.keras.models.save_model`. Note that you can still load this SavedModel with `tf.saved_model.load`.

Since I was unable to save the above model I checked the Keras Documentation for an alternative:


ModelSize (MB)Top-1 AccuracyTop-5 AccuracyParametersDepthTime (ms) per inference step (CPU)Time (ms) per inference step (GPU)
MobileNetV21471.3%90.1%3.5M10525.93.8
EfficientNetB47582.9%96.4%19.5M258308.315.1
EfficientNetV2S8883.9%96.7%21.6M---

  • Training Scores:
    • EfficientNetB4: loss: 0.5003 - accuracy: 0.7954 - topk_accuracy: 0.9451 ~80MB
    • EfficientNetV2S: loss: 0.5144 - accuracy: 0.7968 - topk_accuracy: 0.9359 104.6MB
    • MobileNetV3SM: loss: 0.5638 - accuracy: 0.7682 - topk_accuracy: 0.9328 12.8MB
# transfer learning
backbone = tf.keras.applications.efficientnet_v2.EfficientNetV2S(
include_top=False,
weights='imagenet',
input_shape=(SIZE, SIZE, 3),
include_preprocessing=True
)
backbone.trainable = False
efficient_model = tf.keras.Sequential([
Input(shape=(SIZE, SIZE, 3)),
backbone,
GlobalAveragePooling2D(),
Dense(DENSE1, activation='relu'),
BatchNormalization(),
Dense(DENSE2, activation='relu'),
Dense(NLABELS, activation='softmax')
])

efficient_model.summary()
checkpoint_callback = ModelCheckpoint(
'best_weights',
monitor='val_accuracy',
mode='max',
verbose=1,
save_best_only=True
)
loss_function = CategoricalCrossentropy()
metrics = [CategoricalAccuracy(name='accuracy'), TopKCategoricalAccuracy(k=2, name='topk_accuracy')]
efficient_model.compile(
optimizer = Adam(learning_rate=LR),
loss = loss_function,
metrics = metrics
)

Model Training

efficient_history = efficient_model.fit(
training_dataset,
validation_data = testing_dataset,
epochs = EPOCHS,
verbose = 1
)

Model Evaluation

efficient_model.evaluate(testing_dataset)
# loss: 0.5144 - accuracy: 0.7968 - topk_accuracy: 0.9359
plt.plot(efficient_history.history['loss'])
plt.plot(efficient_history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_loss', 'val_loss'])
plt.show()

tf Emotion Detection

plt.plot(efficient_history.history['accuracy'])
plt.plot(efficient_history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_accuracy', 'val_accuracy'])
plt.show()

tf Emotion Detection

test_image = cv.imread('./dataset/happy.jpg')
test_image = cv.resize(test_image, (SIZE, SIZE))
img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = efficient_model(img).numpy()
print(label)
# [[7.2473467e-06 4.2083409e-02 9.5790941e-01]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# sad
test_image = cv.imread('./dataset/sad.jpg')
test_image = cv.resize(test_image, (SIZE, SIZE))
img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = efficient_model(img).numpy()
print(label)
# [[0.06096885 0.03196466 0.90706646]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# sad
test_image = cv.imread('./dataset/angry.jpg')
test_image = cv.resize(test_image, (SIZE, SIZE))
img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = efficient_model(img).numpy()
print(label)
# [[0.87592113 0.08582695 0.03825196]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# angry
plt.figure(figsize=(16,16))

for images, labels in testing_dataset.take(1):
for i in range(16):
ax = plt.subplot(4,4,i+1)
true = "True: " + LABELS[tf.argmax(labels[i], axis=0).numpy()]
pred = "Predicted: " + LABELS[
tf.argmax(efficient_model(tf.expand_dims(images[i], axis=0)).numpy(), axis=1).numpy()[0]
]
plt.title(
true + "\n" + pred
)
plt.imshow(images[i]/255.)
plt.axis('off')

plt.savefig('assets/tf_Emotion_Detection_17.webp', bbox_inches='tight')

tf Emotion Detection

y_pred = []
y_test = []

for img, label in testing_dataset:
y_pred.append(efficient_model(img))
y_test.append(label.numpy())
conf_mtx = ConfusionMatrixDisplay(
confusion_matrix=confusion_matrix(
np.argmax(y_test[:-1], axis=-1).flatten(),
np.argmax(y_pred[:-1], axis=-1).flatten()
),
display_labels=LABELS
)

fig, ax = plt.subplots(figsize=(16,12))
conf_mtx.plot(ax=ax, cmap='plasma', include_values=False)

plt.savefig('assets/tf_Emotion_Detection_18.webp', bbox_inches='tight')

tf Emotion Detection

Saving the Model

tf.keras.saving.save_model(
efficient_model, 'saved_model/efficient_model', overwrite=True, save_format='tf'
)
# restore the model
restored_model = tf.keras.saving.load_model('saved_model/efficient_model')
# Check its architecture
restored_model.summary()
restored_model.evaluate(testing_dataset)
# loss: 0.5144 - accuracy: 0.7968 - topk_accuracy: 0.9359

Building the MobileNet TF Model

# transfer learning
backbone2 = tf.keras.applications.MobileNetV3Small(
input_shape=(SIZE, SIZE, 3),
alpha=1.0,
minimalistic=True,
include_top=False,
weights='imagenet',
dropout_rate=0.2,
include_preprocessing=True
)
backbone2.trainable = False
mobilenet_model = tf.keras.Sequential([
Input(shape=(SIZE, SIZE, 3)),
backbone2,
GlobalAveragePooling2D(),
Dense(DENSE1, activation='relu'),
BatchNormalization(),
Dense(DENSE2, activation='relu'),
Dense(NLABELS, activation='softmax')
])

mobilenet_model.summary()
checkpoint_callback = ModelCheckpoint(
'best_weights',
monitor='val_accuracy',
mode='max',
verbose=1,
save_best_only=True
)
loss_function = CategoricalCrossentropy()
metrics = [CategoricalAccuracy(name='accuracy'), TopKCategoricalAccuracy(k=2, name='topk_accuracy')]
mobilenet_model.compile(
optimizer = Adam(learning_rate=LR),
loss = loss_function,
metrics = metrics
)

Model Training

tf.config.run_functions_eagerly(True)
mobilenet_history = mobilenet_model.fit(
training_dataset,
validation_data = testing_dataset,
epochs = EPOCHS,
verbose = 1
)

# loss: 0.4242
# accuracy: 0.8263
# topk_accuracy: 0.9597
# val_loss: 0.5638
# val_accuracy: 0.7682
# val_topk_accuracy: 0.9328

Model Evaluation

mobilenet_model.evaluate(testing_dataset)
# loss: 0.5144 - accuracy: 0.7968 - topk_accuracy: 0.9359
plt.plot(mobilenet_history.history['loss'])
plt.plot(mobilenet_history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_loss', 'val_loss'])
plt.show()

tf Emotion Detection

plt.plot(mobilenet_history.history['accuracy'])
plt.plot(mobilenet_history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_accuracy', 'val_accuracy'])
plt.show()

tf Emotion Detection

test_image = cv.imread('./dataset/happy.jpg')
test_image = cv.resize(test_image, (SIZE, SIZE))
img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = mobilenet_model(img).numpy()
print(label)
# [[2.512952e-04 9.718944e-01 2.785430e-02]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# happy
test_image = cv.imread('./dataset/sad.jpg')
test_image = cv.resize(test_image, (SIZE, SIZE))
img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = mobilenet_model(img).numpy()
print(label)
# [[4.4993469e-03 9.9456584e-01 9.3488337e-04]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# happy
test_image = cv.imread('./dataset/angry.jpg')
test_image = cv.resize(test_image, (SIZE, SIZE))
img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = mobilenet_model(img).numpy()
print(label)
# [[9.8563331e-01 1.4297843e-02 6.8810325e-05]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# angry
plt.figure(figsize=(16,16))

for images, labels in testing_dataset.take(1):
for i in range(16):
ax = plt.subplot(4,4,i+1)
true = "True: " + LABELS[tf.argmax(labels[i], axis=0).numpy()]
pred = "Predicted: " + LABELS[
tf.argmax(mobilenet_model(tf.expand_dims(images[i], axis=0)).numpy(), axis=1).numpy()[0]
]
plt.title(
true + "\n" + pred
)
plt.imshow(images[i]/255.)
plt.axis('off')

plt.savefig('assets/tf_Emotion_Detection_21.webp', bbox_inches='tight')

tf Emotion Detection

y_pred = []
y_test = []

for img, label in testing_dataset:
y_pred.append(mobilenet_model(img))
y_test.append(label.numpy())
conf_mtx = ConfusionMatrixDisplay(
confusion_matrix=confusion_matrix(
np.argmax(y_test[:-1], axis=-1).flatten(),
np.argmax(y_pred[:-1], axis=-1).flatten()
),
display_labels=LABELS
)

fig, ax = plt.subplots(figsize=(16,12))
conf_mtx.plot(ax=ax, cmap='plasma', include_values=False)

plt.savefig('assets/tf_Emotion_Detection_22.webp', bbox_inches='tight')

tf Emotion Detection

Saving the Model

tf.keras.saving.save_model(
mobilenet_model, 'saved_model/mobilenet_model', overwrite=True, save_format='tf'
)
# restore the model
restored_model2 = tf.keras.saving.load_model('saved_model/mobilenet_model')
# Check its architecture
restored_model2.summary()
restored_model2.evaluate(testing_dataset)
# loss: 0.5638 - accuracy: 0.7682 - topk_accuracy: 0.9328

TFLite Conversion

# Convert the model into TF Lite.
converter = tf.lite.TFLiteConverter.from_saved_model('saved_model/mobilenet_model')
tflite_model = converter.convert()
# Save the model.
with open('saved_model/mobilenet_model.tflite', 'wb') as f:
f.write(tflite_model)

Finetuning the MobileNet TF Model

# transfer learning
backbone = tf.keras.applications.MobileNetV3Small(
input_shape=(SIZE, SIZE, 3),
alpha=1.0,
minimalistic=True,
include_top=False,
weights='imagenet',
dropout_rate=0.2,
include_preprocessing=True
)
backbone.trainable = False
# mobilenet_model = tf.keras.Sequential([
# Input(shape=(SIZE, SIZE, 3)),
# backbone2,
# GlobalAveragePooling2D(),
# Dense(DENSE1, activation='relu'),
# BatchNormalization(),
# Dense(DENSE2, activation='relu'),
# Dense(NLABELS, activation='softmax')
# ])

input = Input(shape=(SIZE,SIZE,3))
x = backbone(input, training=False)

x = GlobalAveragePooling2D()(x)
x = Dense(DENSE1, activation='relu')(x)
x = BatchNormalization()(x)
x = Dense(DENSE2, activation='relu')(x)

output = Dense(NLABELS, activation='softmax')(x)

mobilenet_model = Model(input, output)
mobilenet_model.summary()
checkpoint_callback = ModelCheckpoint(
'best_weights',
monitor='val_accuracy',
mode='max',
verbose=1,
save_best_only=True
)
early_stopping_callback = EarlyStopping(
monitor='val_accuracy',
patience=10,
restore_best_weights=True
)
loss_function = CategoricalCrossentropy()
metrics = [CategoricalAccuracy(name='accuracy'), TopKCategoricalAccuracy(k=2, name='topk_accuracy')]
mobilenet_model.compile(
optimizer = Adam(learning_rate=LR),
loss = loss_function,
metrics = metrics
)

Model Training

tf.config.run_functions_eagerly(True)
mobilenet_history = mobilenet_model.fit(
training_dataset,
validation_data = testing_dataset,
epochs = EPOCHS,
verbose = 1,
callbacks=[checkpoint_callback, early_stopping_callback]
)

# loss: 0.4372
# accuracy: 0.8188
# topk_accuracy: 0.9547
# val_loss: 0.5934
# val_accuracy: 0.7643
# val_topk_accuracy: 0.9320

Model Evaluation

mobilenet_model.evaluate(testing_dataset)
# loss: 0.5934 - accuracy: 0.7643 - topk_accuracy: 0.9320

Model Finetuning

backbone.trainable = True
mobilenet_model.compile(
optimizer = Adam(learning_rate=LR/100),
loss = loss_function,
metrics = metrics
)
mobilenet_history = mobilenet_model.fit(
training_dataset,
validation_data = testing_dataset,
epochs = EPOCHS,
verbose = 1,
callbacks=[checkpoint_callback, early_stopping_callback]
)

# loss: 0.2703
# accuracy: 0.8912
# topk_accuracy: 0.9763
# val_loss: 0.3906
# val_accuracy: 0.8455
# val_topk_accuracy: 0.9627

Model Evaluation

mobilenet_model.evaluate(testing_dataset)
# loss: 0.3906 - accuracy: 0.8455 - topk_accuracy: 0.9627
plt.plot(mobilenet_history.history['loss'])
plt.plot(mobilenet_history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_loss', 'val_loss'])
plt.show()

tf Emotion Detection

plt.plot(mobilenet_history.history['accuracy'])
plt.plot(mobilenet_history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_accuracy', 'val_accuracy'])
plt.show()

tf Emotion Detection

test_image = cv.imread('./dataset/happy.jpg')
test_image = cv.resize(test_image, (SIZE, SIZE))
img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = mobilenet_model(img).numpy()
print(label)
# [[0.01388984 0.95678276 0.02932744]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# happy
test_image = cv.imread('./dataset/sad.jpg')
test_image = cv.resize(test_image, (SIZE, SIZE))
img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = mobilenet_model(img).numpy()
print(label)
# [[5.297706e-04 9.994018e-01 6.839229e-05]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# happy
test_image = cv.imread('./dataset/angry.jpg')
test_image = cv.resize(test_image, (SIZE, SIZE))
img = tf.constant(test_image, dtype=tf.float32)
img = tf.expand_dims(img, axis=0)

label = mobilenet_model(img).numpy()
print(label)
# [[9.6875304e-01 3.1238725e-02 8.2295473e-06]]
print(LABELS[tf.argmax(label, axis=1).numpy()[0]])
# angry
plt.figure(figsize=(16,16))

for images, labels in testing_dataset.take(1):
for i in range(16):
ax = plt.subplot(4,4,i+1)
true = "True: " + LABELS[tf.argmax(labels[i], axis=0).numpy()]
pred = "Predicted: " + LABELS[
tf.argmax(mobilenet_model(tf.expand_dims(images[i], axis=0)).numpy(), axis=1).numpy()[0]
]
plt.title(
true + "\n" + pred
)
plt.imshow(images[i]/255.)
plt.axis('off')

plt.savefig('assets/tf_Emotion_Detection_25.webp', bbox_inches='tight')

tf Emotion Detection

y_pred = []
y_test = []

for img, label in testing_dataset:
y_pred.append(mobilenet_model(img))
y_test.append(label.numpy())
conf_mtx = ConfusionMatrixDisplay(
confusion_matrix=confusion_matrix(
np.argmax(y_test[:-1], axis=-1).flatten(),
np.argmax(y_pred[:-1], axis=-1).flatten()
),
display_labels=LABELS
)

fig, ax = plt.subplots(figsize=(16,12))
conf_mtx.plot(ax=ax, cmap='plasma', include_values=False)

plt.savefig('assets/tf_Emotion_Detection_26.webp', bbox_inches='tight')

tf Emotion Detection

Saving the Model

tf.keras.saving.save_model(
mobilenet_model, 'saved_model/mobilenet_model', overwrite=True, save_format='tf'
)
# restore the model
restored_model2 = tf.keras.saving.load_model('saved_model/mobilenet_model')
# Check its architecture
restored_model2.summary()
restored_model2.evaluate(testing_dataset)
# loss: 0.3906 - accuracy: 0.8455 - topk_accuracy: 0.9627

TFLite Conversion

# Convert the model into TF Lite.
converter = tf.lite.TFLiteConverter.from_saved_model('saved_model/mobilenet_model')
tflite_model = converter.convert()
# Save the model.
with open('saved_model/mobilenet_model.tflite', 'wb') as f:
f.write(tflite_model)