Skip to main content

Mong Kok, Hong Kong

Github Repository

See also:

Tensorflow Neural Network Regression

Optimizing Model Performance

Get more data

# create a new feature matrix
X = tf.range(-1000, 1000, 4)
# create labels for those features that follow a pattern
y = X + 10
len(X)
# 500 => 80:20 split = 400:100
# to get around biases it is better to shuffle your data
# first create random indices to be able to shuffle
# both features and lables by the same order
indices = tf.range(start=0, limit=tf.shape(X)[0], dtype=tf.int32)
shuffled_indices = tf.random.shuffle(indices)

# use random indices to shuffle both tensors
X_random = tf.gather(X, shuffled_indices)
y_random = tf.gather(y, shuffled_indices)

# re-split
X_train = X_random[:400] # take first 80% of features
X_test = X_random[400:] # take last 20% of features
y_train = y_random[:400] # take first 80% of lables
y_test = y_random[400:] # take last 20% of lables

# visualize datasets
plt.figure(figsize=(10,7))
plt.scatter(X_train, y_train, color="blue", label="Training Data")
plt.scatter(X_test, y_test, color="hotpink", label="Testing Data")
plt.legend()

Tensorflow Neural Network Regression

Add Model Complexity

  • model_1 - longer training 200 epochs
  • model_2 - add another dense layer
  • model_3 - add another dropout layer
  • model_4 - same as model above but trained for 500 epochs
  • ... take the best model and reduce complexity to find an optimum

model_1

# longer training 200 epochs
model_1 = tf.keras.Sequential([
layers.Dense(1, input_shape=[1], name="input_layer"),
layers.Dense(128, activation="relu", name="dense_layer1"),
layers.Dropout(.25, name="dropout"),
layers.Dense(64, activation="relu", name="dense_layer2"),
layers.Dense(1, name="output_layer")
], name="my_model_1")

model_1.compile(
loss=tf.keras.losses.mae,
optimizer=optimizers.Adam(learning_rate=0.01),
metrics="mae")
model_1.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=200)
# Epoch 200/200
# 13/13 [==============================] - 0s 5ms/step - loss: 10.6268 - mae: 10.6268 - val_loss: 122.6467 - val_mae: 122.6467

y_pred_1 = model_1.predict(X_test)

# blue dots should line up with red dots for a perfect prediction
plot_predictions(train_data=X_train,
train_labels=y_train,
test_data=X_test,
test_labels=y_test,
predictions=y_pred_1)

Tensorflow Neural Network Regression

model_2

# add another dense layer
model_2 = tf.keras.Sequential([
layers.Dense(1, input_shape=[1], name="input_layer"),
layers.Dense(128, activation="relu", name="dense_layer1"),
layers.Dropout(.25, name="dropout"),
layers.Dense(64, activation="relu", name="dense_layer2"),
layers.Dense(32, activation="relu", name="dense_layer3"),
layers.Dense(1, name="output_layer")
], name="my_model_2")

model_2.compile(
loss=tf.keras.losses.mae,
optimizer=optimizers.Adam(learning_rate=0.01),
metrics="mae")
model_2.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100)
# Epoch 100/100
# 13/13 [==============================] - 0s 5ms/step - loss: 31.7044 - mae: 31.7044 - val_loss: 10.8619 - val_mae: 10.8619

y_pred_2 = model_2.predict(X_test)

# blue dots should line up with red dots for a perfect prediction
plot_predictions(train_data=X_train,
train_labels=y_train,
test_data=X_test,
test_labels=y_test,
predictions=y_pred_2)

Tensorflow Neural Network Regression

model_3

# add another dropout layer
model_3 = tf.keras.Sequential([
layers.Dense(1, input_shape=[1], name="input_layer"),
layers.Dense(128, activation="relu", name="dense_layer1"),
layers.Dropout(.25, name="dropout1"),
layers.Dense(64, activation="relu", name="dense_layer2"),
layers.Dropout(.25, name="dropout2"),
layers.Dense(32, activation="relu", name="dense_layer3"),
layers.Dense(1, name="output_layer")
], name="my_model_3")

model_3.compile(
loss=tf.keras.losses.mae,
optimizer=optimizers.Adam(learning_rate=0.01),
metrics="mae")
model_3.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100)
# Epoch 100/100
# 13/13 [==============================] - 0s 5ms/step - loss: 44.1139 - mae: 44.1139 - val_loss: 107.2299 - val_mae: 107.2299

y_pred_3 = model_3.predict(X_test)

# blue dots should line up with red dots for a perfect prediction
plot_predictions(train_data=X_train,
train_labels=y_train,
test_data=X_test,
test_labels=y_test,
predictions=y_pred_3)

Tensorflow Neural Network Regression

model_4

# same as model above but trained for 500 epochs
model_4 = tf.keras.Sequential([
layers.Dense(1, input_shape=[1], name="input_layer"),
layers.Dense(128, activation="relu", name="dense_layer1"),
layers.Dropout(.25, name="dropout1"),
layers.Dense(64, activation="relu", name="dense_layer2"),
layers.Dropout(.25, name="dropout2"),
layers.Dense(32, activation="relu", name="dense_layer3"),
layers.Dense(1, name="output_layer")
], name="my_model_4")

model_4.compile(
loss=tf.keras.losses.mae,
optimizer=optimizers.Adam(learning_rate=0.01),
metrics="mae")
model_4.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=500)
# Epoch 500/500
# 13/13 [==============================] - 0s 5ms/step - loss: 38.3689 - mae: 38.3689 - val_loss: 143.9144 - val_mae: 143.9144

y_pred_4 = model_4.predict(X_test)

# blue dots should line up with red dots for a perfect prediction
plot_predictions(train_data=X_train,
train_labels=y_train,
test_data=X_test,
test_labels=y_test,
predictions=y_pred_4)

Tensorflow Neural Network Regression

model_5

# I am getting the best results with `model_2`
# but a 2nd dropout layer seems to reduce the performance
# let's remove the 1st dropout
model_5 = tf.keras.Sequential([
layers.Dense(1, input_shape=[1], name="input_layer"),
layers.Dense(128, activation="relu", name="dense_layer1"),
layers.Dense(64, activation="relu", name="dense_layer2"),
layers.Dense(1, name="output_layer")
], name="my_model_5")

model_5.compile(
loss=tf.keras.losses.mae,
optimizer=optimizers.Adam(learning_rate=0.001),
metrics="mae")
model_5.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100)
# Epoch 100/100
# 13/13 [==============================] - 0s 4ms/step - loss: 2.1195 - mae: 2.1195 - val_loss: 2.9318 - val_mae: 2.9318

y_pred_5 = model_5.predict(X_test)

# blue dots should line up with red dots for a perfect prediction
plot_predictions(train_data=X_train,
train_labels=y_train,
test_data=X_test,
test_labels=y_test,
predictions=y_pred_5)

Tensorflow Neural Network Regression

model_6

# same as above - based on `model_2`
# but more complexity removed
model_6 = tf.keras.Sequential([
layers.Dense(1, input_shape=[1], name="input_layer"),
layers.Dense(64, activation="relu", name="dense_layer1"),
layers.Dense(32, activation="relu", name="dense_layer2"),
layers.Dense(1, name="output_layer")
], name="my_model_6")

model_6.compile(
loss=tf.keras.losses.mae,
optimizer=optimizers.Adam(learning_rate=0.001),
metrics="mae")
model_6.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100)
# Epoch 100/100
# 13/13 [==============================] - 0s 5ms/step - loss: 1.9283 - mae: 1.9283 - val_loss: 2.0062 - val_mae: 2.0062

y_pred_6 = model_6.predict(X_test)

# blue dots should line up with red dots for a perfect prediction
plot_predictions(train_data=X_train,
train_labels=y_train,
test_data=X_test,
test_labels=y_test,
predictions=y_pred_6)

Tensorflow Neural Network Regression

model_7

# same as above - based on `model_2`
# but even more complexity removed
model_7 = tf.keras.Sequential([
layers.Dense(1, input_shape=[1], name="input_layer"),
layers.Dense(16, activation="relu", name="dense_layer1"),
layers.Dense(8, activation="relu", name="dense_layer2"),
layers.Dense(1, name="output_layer")
], name="my_model_6")

model_7.compile(
loss=tf.keras.losses.mae,
optimizer=optimizers.Adam(learning_rate=0.001),
metrics="mae")
# increased epochs 100 -> 500
model_7.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=500)
# Epoch 500/500
# 13/13 [==============================] - 0s 5ms/step - loss: 0.6490 - mae: 0.6490 - val_loss: 0.9975 - val_mae: 0.9975

y_pred_7 = model_7.predict(X_test)

# blue dots should line up with red dots for a perfect prediction
plot_predictions(train_data=X_train,
train_labels=y_train,
test_data=X_test,
test_labels=y_test,
predictions=y_pred_7)

Tensorflow Neural Network Regression

Comparing Experiments

# function to calculate mae & mse
def squared_and_absolute_error(y_true, y_pred):
mae = tf.metrics.mean_absolute_error(y_true, tf.squeeze(y_pred))
mse = tf.metrics.mean_squared_error(y_true, tf.squeeze(y_pred))

return mae.numpy(), mse.numpy()
# calculate errors for all models
model_1_err = squared_and_absolute_error(y_true=y_test, y_pred=y_pred_1)
model_2_err = squared_and_absolute_error(y_true=y_test, y_pred=y_pred_2)
model_3_err = squared_and_absolute_error(y_true=y_test, y_pred=y_pred_3)
model_4_err = squared_and_absolute_error(y_true=y_test, y_pred=y_pred_4)
model_5_err = squared_and_absolute_error(y_true=y_test, y_pred=y_pred_5)
model_6_err = squared_and_absolute_error(y_true=y_test, y_pred=y_pred_6)
model_7_err = squared_and_absolute_error(y_true=y_test, y_pred=y_pred_7)

model_err = [["model_1", model_1_err[0], model_1_err[1]],
["model_2", model_2_err[0], model_2_err[1]],
["model_3", model_3_err[0], model_3_err[1]],
["model_4", model_4_err[0], model_4_err[1]],
["model_5", model_5_err[0], model_5_err[1]],
["model_6", model_6_err[0], model_6_err[1]],
["model_7", model_7_err[0], model_7_err[1]]]

result_table = pd.DataFrame(model_err, columns=["model", "mae", "mse"])
# sort results by lowest error
result_table.sort_values(by=['mae'])
modelmaemse
model_71.1441041.818071
model_61.6842253.257224
model_52.0491265.940683
model_249.4325643300.268799
model_4100.50956714063.405273
model_3102.29712714211.152344
model_1121.41403220278.529297
# the best performing model is `model_7`
model_7.summary()

# Model: "my_model_6"
# _________________________________________________________________
# Layer (type) Output Shape Param #
# =================================================================
# input_layer (Dense) (None, 1) 2

# dense_layer1 (Dense) (None, 16) 32

# dense_layer2 (Dense) (None, 8) 136

# output_layer (Dense) (None, 1) 9

# =================================================================
# Total params: 179
# Trainable params: 179
# Non-trainable params: 0
# _________________________________________________________________

Saving the Trained Models

  1. SavedModel format (default)
  2. HDF5 format (easier to transfer outside of TF)
# assign location
path='saved_models/SavedModel_Format'
# saving to SavedModel
model_7.save(path)
# assign location
path_hdf5='saved_models/hdf5_format.h5'
# saving to HDF5
model_7.save(path_hdf5)

Restoring a Saved Model

loaded_SavedModel = tf.keras.models.load_model(path)
# Verify that it is the correct model
loaded_SavedModel.summary()

# Model: "my_model_6"
# _________________________________________________________________
# Layer (type) Output Shape Param #
# =================================================================
# input_layer (Dense) (None, 1) 2

# dense_layer1 (Dense) (None, 16) 32

# dense_layer2 (Dense) (None, 8) 136

# output_layer (Dense) (None, 1) 9

# =================================================================
# Total params: 179
# Trainable params: 179
# Non-trainable params: 0
# _________________________________________________________________

# make a prediction with the restored model
# lets find a value pair from the test dataset
X_test[66], y_test[66]
# (<tf.Tensor: shape=(), dtype=int32, numpy=-944>,
# <tf.Tensor: shape=(), dtype=int32, numpy=-934>)
# So for a feature value of `-944` we should receive a label value of `-943`
# run prediction
loaded_SavedModel.predict([-944])
# array([[-936.5761]], dtype=float32)
loaded_HDF5Model = tf.keras.models.load_model(path_hdf5)
# Verify that it is the correct model
loaded_HDF5Model.summary()

# Model: "my_model_6"
# _________________________________________________________________
# Layer (type) Output Shape Param #
# =================================================================
# input_layer (Dense) (None, 1) 2

# dense_layer1 (Dense) (None, 16) 32

# dense_layer2 (Dense) (None, 8) 136

# output_layer (Dense) (None, 1) 9

# =================================================================
# Total params: 179
# Trainable params: 179
# Non-trainable params: 0
# _________________________________________________________________
# run prediction
loaded_HDF5Model.predict([-944])
# array([[-936.5761]], dtype=float32)
# the result is identical to the result we get from
# the restored SavedModel format model
model_7.predict([-944]) == loaded_SavedModel.predict([-944]) == loaded_HDF5Model.predict([-944])
# array([[ True]])
# the restored models predict the identical values