TST, HongKong

Serving your SciKit Learn Model as a Prediction API

Serving your SciKit Learn Model as a Prediction API

Image Data Preprocessing

import collections
from glob import glob
import matplotlib.pyplot as plt
from matplotlib import patches
import numpy as np
import os
import pandas as pd
import pickle
import re
from scipy import ndimage
from skimage import (
    io,
    color,
    exposure,
    transform,
    feature
)
import seaborn as sns
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    ConfusionMatrixDisplay)
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler

SEED = 42

Image Dataset Preparation

Get Image Dataset from Local Directory

# get list of images https://paperswithcode.com/dataset/food-101
data_dir = os.listdir('dataset/animals')
print(data_dir)
# ['bear', 'cat', 'chicken', 'cow', 'deer', 'dog', 'duck', 'eagle', 'elephant', 'human', 'lion', 'monkey', 'mouse', 'panda', 'pigeon', 'rabbit', 'sheep', 'tiger', 'wolf']

glob('./dataset/animals/{}/*.jpg'.format('wolf'))

# ['./dataset/animals/wolf/wolffrontal0001.jpg',
#  './dataset/animals/wolf/wolffrontal0002.jpg',
#  './dataset/animals/wolf/wolffrontal0003.jpg',
#  ...

all_files = []
for subfolder in data_dir:
    all_files += glob('./dataset/animals/{}/*.jpg'.format(subfolder))
    
len(all_files)
# 2015

data_collection = io.ImageCollection(all_files)
data_collection.files
# ['./dataset/animals/bear/bearfrontal0001.jpg',
#  './dataset/animals/bear/bearfrontal0002.jpg',
#  './dataset/animals/bear/bearfrontal0003.jpg',
#  './dataset/animals/bear/bearfrontal0004.jpg',
#...

Get Image Labels from Folder Structure

# use regular expression to extract folder name as label - example:
re.search(
    r'./dataset/animals/(.*?)/',
    './dataset/animals/bear/bearfrontal0001.jpg'
).group(1)
# label extracted: 'bear'

def extract_labels(location):
    label = re.search(
        r'./dataset/animals/(.*?)/', location
    ).group(1)
    
    return label

labels = list(map(extract_labels, data_collection.files))
list(set(labels))
# ['human',
#  'cat',
#  'lion',
#  'sheep',
#  'cow',
#  'mouse',
#  'pigeon',
#  'tiger',
#  'rabbit',
#  'elephant',
#  'deer',
#  'eagle',
#  'dog',
#  'wolf',
#  'panda',
#  'monkey',
#  'duck',
#  'chicken',
#  'bear']

Dataset Export

def buffer(item):
    return item

# dataset_arrs = np.array(list(map(buffer,dataset)))
dataset_list = list(map(buffer, data_collection))
dataset_array = np.asarray(dataset_list)
dataset_array.shape
# (2015, 80, 80, 3)

data_dict = dict()

data_dict['description'] = '2015 80x80 RGB images of 19 classes.'
data_dict['data'] = dataset_array
data_dict['target'] = labels
data_dict['labels'] = set(labels)

label_distribution = collections.Counter(data_dict['target'])

plt.figure(figsize=(16, 5))
plt.title('Target Distribution in Dataset')

sns.countplot(
    data=data_dict,
    x='target'
)

plt.savefig('assets/Scikit_Image_Model_Deployment_01.webp', bbox_inches='tight')

Scikit-Image Introduction

# plot multiple random images with labels
ran_gen = np.random.default_rng()

plt.figure(figsize=(12, 12))

for i in range(12):
    ax = plt.subplot(4, 4, i+1)
    random_index = ran_gen.integers(low=0, high=2015, size=1)
    plt.imshow(data_dict['data'][random_index[0]])
    plt.title(data_dict['target'][random_index[0]])
    plt.axis(False)

plt.savefig('assets/Scikit_Image_Model_Deployment_02.webp', bbox_inches='tight')

Scikit-Image Introduction

# save the dateset
output = open('dataset/animals.pkl', 'wb')
pickle.dump(data_dict, output)

Data Preprocessing

# load dataset pickle
dataset = pickle.load(open('dataset/animals.pkl', 'rb'))
dataset['description']

X = dataset['data']
y = dataset['target']

# train-test-split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=SEED
)
print(X_train.shape, X_test.shape)
# (1612, 80, 80, 3) (403, 80, 80, 3)

Feature Extraction

Histogram of Oriented Gradients (HOG)

testimg = io.imread('assets/lion.jpg')

# hog feature descriptor
feature_vector, hog_image = feature.hog(testimg, orientations=8, pixels_per_cell=(16, 16),
                    cells_per_block=(1, 1), visualize=True, channel_axis=-1)

# Rescale histogram for better display
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 5))

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), sharex=True, sharey=True)

ax1.axis('off')
ax1.imshow(testimg, cmap=plt.cm.gray)
ax1.set_title('Input image')

ax2.axis('off')
ax2.imshow(hog_image_rescaled, cmap=plt.cm.gray)
ax2.set_title('Histogram of Oriented Gradients')
plt.show()

Scikit-Image Introduction

class hog_transformer(BaseEstimator, TransformerMixin):
    def __init__(
        self,
        orientations=9,
        pixels_per_cell=(8, 8),
        cells_per_block=(3, 3)
    ):
        self.orientations = orientations
        self.pixels_per_cell = pixels_per_cell
        self.cells_per_block = cells_per_block
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        def local_hog(img):
            feature_vector = feature.hog(
                img,
                orientations=self.orientations,
                pixels_per_cell = self.pixels_per_cell,
                cells_per_block = self.cells_per_block,
                channel_axis=-1
            )
            
            return feature_vector
        
        return [local_hog(x) for x in X]

Model Training

SGD Classifier Training Pipeline

Feature Extraction
Normalization
Model Fitting

feature_extractor = hog_transformer()
scaler = StandardScaler()
model_sgd = SGDClassifier(loss='hinge', learning_rate='adaptive', eta0=0.1, early_stopping=True)

X_train_fv = feature_extractor.fit_transform(X_train)
X_test_fv = feature_extractor.transform(X_test)

X_train_fv_scaled = scaler.fit_transform(X_train_fv)
X_test_fv_scaled = scaler.transform(X_test_fv)

model_sgd.fit(X_train_fv_scaled, y_train)

Model Evaluation

y_pred = model_sgd.predict(X_test_fv_scaled)

# plot predictions
ran_gen = np.random.default_rng()

plt.figure(figsize=(12, 12))

for i in range(12):
    ax = plt.subplot(4, 4, i+1)
    random_index = ran_gen.integers(low=0, high=403, size=1)
    plt.imshow(X_test[random_index[0]])
    plt.title(y_pred[random_index[0]])
    plt.axis(False)

plt.savefig('assets/Scikit_Image_Model_Deployment_04.webp', bbox_inches='tight')

Scikit-Image Introduction

conf_mtx = ConfusionMatrixDisplay(
    confusion_matrix=confusion_matrix(y_test, y_pred.tolist()),
    display_labels=[False]
)

conf_mtx.plot(cmap='plasma')

Scikit-Image Introduction

eval_report = classification_report(y_test, y_pred, output_dict=True)
eval_df = pd.DataFrame(eval_report)
eval_df.transpose()

	precision	recall	f1-score	support
bear	0.428571	0.450000	0.439024	20.000000
cat	0.600000	0.562500	0.580645	32.000000
chicken	0.611111	0.550000	0.578947	20.000000
cow	0.500000	0.600000	0.545455	20.000000
deer	0.695652	0.800000	0.744186	20.000000
dog	0.517241	0.576923	0.545455	26.000000
duck	0.777778	0.700000	0.736842	20.000000
eagle	0.466667	0.736842	0.571429	19.000000
elephant	0.541667	0.650000	0.590909	20.000000
human	0.826087	0.950000	0.883721	20.000000
lion	0.375000	0.300000	0.333333	20.000000
monkey	0.466667	0.350000	0.400000	20.000000
mouse	0.416667	0.250000	0.312500	20.000000
panda	0.678571	0.826087	0.745098	23.000000
pigeon	0.631579	0.600000	0.615385	20.000000
rabbit	0.800000	0.600000	0.685714	20.000000
sheep	0.550000	0.550000	0.550000	20.000000
tiger	0.800000	0.695652	0.744186	23.000000
wolf	0.444444	0.400000	0.421053	20.000000
accuracy	0.588089	0.588089	0.588089	0.588089
macro avg	0.585669	0.586737	0.580204	403.000000
weighted avg	0.587659	0.588089	0.582170	403.000000

Hyperparameter Tuning

Training Pipeline

train_pipeline = Pipeline([
    ('feature_extraction', hog_transformer(
        orientations=9,
        pixels_per_cell=(8, 8),
        cells_per_block=(3, 3))
    ),
    ('normalization', StandardScaler()),
    ('model_training', SGDClassifier(
        loss='hinge', eta0=0.1,
        learning_rate='adaptive',
        early_stopping=True)
    )
])

train_pipeline.fit(X_train, y_train)

y_pred_pipe = train_pipeline.predict(X_test)

eval_report = classification_report(y_test, y_pred_pipe, output_dict=True)
eval_df = pd.DataFrame(eval_report)
eval_df.transpose()

	precision	recall	f1-score	support
bear	0.368421	0.350000	0.358974	20.000000
cat	0.607143	0.531250	0.566667	32.000000
chicken	0.571429	0.600000	0.585366	20.000000
cow	0.681818	0.750000	0.714286	20.000000
deer	0.772727	0.850000	0.809524	20.000000
dog	0.354839	0.423077	0.385965	26.000000
duck	0.500000	0.550000	0.523810	20.000000
eagle	0.473684	0.473684	0.473684	19.000000
elephant	0.444444	0.600000	0.510638	20.000000
human	0.680000	0.850000	0.755556	20.000000
lion	0.500000	0.350000	0.411765	20.000000
monkey	0.500000	0.400000	0.444444	20.000000
mouse	0.307692	0.200000	0.242424	20.000000
panda	0.772727	0.739130	0.755556	23.000000
pigeon	0.608696	0.700000	0.651163	20.000000
rabbit	0.600000	0.600000	0.600000	20.000000
sheep	0.500000	0.550000	0.523810	20.000000
tiger	0.722222	0.565217	0.634146	23.000000
wolf	0.789474	0.750000	0.769231	20.000000
accuracy	0.568238	0.568238	0.568238	0.568238
macro avg	0.566069	0.570124	0.564053	403.000000
weighted avg	0.567078	0.568238	0.563651	403.000000

GridSearch

estimator = Pipeline([
    ('feature_extraction', hog_transformer()),
    ('normalization', StandardScaler()),
    ('model_training', SGDClassifier())
])

param_grid = [
    {
        'feature_extraction__orientations': [7, 8, 9, 10, 11],
        'feature_extraction__pixels_per_cell': [(7, 7), (8, 8), (9, 9)],
        'feature_extraction__cells_per_block': [(2, 2), (3, 3), (4, 4)],
        'model_training__loss': ['hinge', 'squared_hinge', 'perceptron'],
        'model_training__eta0': [0.001, 0.1, 1],
        'model_training__learning_rate': ['optimal', 'adaptive']
    }
]

model_grid = GridSearchCV(
    estimator,
    param_grid,
    scoring='accuracy',
    cv=3,
    n_jobs=-1,
    verbose=2
)

model_grid.fit(X_train, y_train)
# Fitting 3 folds for each of 810 candidates, totalling 2430 fits
# time for a coffee break :)

Best Model Evaluation

model_grid.best_params_

model_grid.best_score_

model_best = model_grid.best_estimator_

y_pred_best = model_best.predict(X_test)

eval_report_best = classification_report(y_test, y_pred_best, output_dict=True)
eval_best_df = pd.DataFrame(eval_report_best)
eval_best_df.transpose()

	precision	recall	f1-score	support
bear	0.680000	0.850000	0.755556	20.000000
cat	0.750000	0.656250	0.700000	32.000000
chicken	0.809524	0.850000	0.829268	20.000000
cow	0.809524	0.850000	0.829268	20.000000
deer	0.869565	1.000000	0.930233	20.000000
dog	0.607143	0.653846	0.629630	26.000000
duck	0.789474	0.750000	0.769231	20.000000
eagle	0.647059	0.578947	0.611111	19.000000
elephant	0.782609	0.900000	0.837209	20.000000
human	1.000000	0.900000	0.947368	20.000000
lion	0.692308	0.450000	0.545455	20.000000
monkey	0.631579	0.600000	0.615385	20.000000
mouse	0.684211	0.650000	0.666667	20.000000
panda	0.916667	0.956522	0.936170	23.000000
pigeon	0.666667	0.800000	0.727273	20.000000
rabbit	0.866667	0.650000	0.742857	20.000000
sheep	0.750000	0.750000	0.750000	20.000000
tiger	0.800000	0.869565	0.833333	23.000000
wolf	0.809524	0.850000	0.829268	20.000000
accuracy	0.764268	0.764268	0.764268	0.764268
macro avg	0.766448	0.766586	0.762383	403.000000
weighted avg	0.765251	0.764268	0.760746	403.000000

conf_mtx_best = ConfusionMatrixDisplay(
    confusion_matrix=confusion_matrix(y_test, y_pred_best.tolist()),
    display_labels=[False]
)

conf_mtx_best.plot(cmap='plasma')

Scikit-Image Introduction

# plot predictions
ran_gen = np.random.default_rng()

plt.figure(figsize=(12, 12))

for i in range(12):
    ax = plt.subplot(4, 4, i+1)
    random_index = ran_gen.integers(low=0, high=403, size=1)
    plt.imshow(X_test[random_index[0]])
    plt.title(y_pred_best[random_index[0]])
    plt.axis(False)

plt.savefig('assets/Scikit_Image_Model_Deployment_07.webp', bbox_inches='tight')

Scikit-Image Introduction

Trained Model Export

# save the model
output = open('model/animal_model_best.pkl', 'wb')
pickle.dump(model_best, output)

Deployment Model

Model Training

best_params = {'feature_extraction__cells_per_block': (2, 2),
 'feature_extraction__orientations': 11,
 'feature_extraction__pixels_per_cell': (8, 8),
 'model_training__eta0': 0.1,
 'model_training__learning_rate': 'optimal',
 'model_training__loss': 'perceptron'}

feature_extractor_pipe = make_pipeline(
    hog_transformer(
        orientations=11,
        pixels_per_cell=(8, 8),
        cells_per_block=(2, 2))
    )

feature_vectors = feature_extractor_pipe.fit_transform(X_train)

normalizer = StandardScaler()

feature_normed = normalizer.fit_transform(feature_vectors)

classifier = SGDClassifier(
        loss='perceptron', eta0=0.1,
        learning_rate='optimal',
        early_stopping=True
)

classifier.fit(feature_normed, y_train)

Model Evaluation

feature_vectors_test = feature_extractor_pipe.transform(X_test)
feature_normed_test = normalizer.transform(feature_vectors_test)
y_pred_deploy = classifier.predict(feature_normed_test)

eval_report_best = classification_report(y_test, y_pred_deploy, output_dict=True)
eval_best_df = pd.DataFrame(eval_report_best)
eval_best_df.transpose()

	precision	recall	f1-score	support
bear	0.700000	0.700000	0.700000	20.00000
cat	0.862069	0.781250	0.819672	32.00000
chicken	0.937500	0.750000	0.833333	20.00000
cow	0.625000	0.750000	0.681818	20.00000
deer	0.809524	0.850000	0.829268	20.00000
dog	0.588235	0.769231	0.666667	26.00000
duck	0.750000	0.750000	0.750000	20.00000
eagle	0.590909	0.684211	0.634146	19.00000
elephant	0.761905	0.800000	0.780488	20.00000
human	1.000000	0.950000	0.974359	20.00000
lion	0.647059	0.550000	0.594595	20.00000
monkey	0.875000	0.700000	0.777778	20.00000
mouse	0.647059	0.550000	0.594595	20.00000
panda	0.956522	0.956522	0.956522	23.00000
pigeon	0.666667	0.800000	0.727273	20.00000
rabbit	0.769231	0.500000	0.606061	20.00000
sheep	0.619048	0.650000	0.634146	20.00000
tiger	0.800000	0.869565	0.833333	23.00000
wolf	0.761905	0.800000	0.780488	20.00000
accuracy	0.749380	0.749380	0.749380	0.74938
macro avg	0.756191	0.745304	0.746028	403.00000
weighted avg	0.759071	0.749380	0.749534	403.00000

Model Export

# save the model
output = open('model/animal_model_deployment.pkl', 'wb')
pickle.dump(classifier, output)

# save fitted normalizer
output = open('model/animal_model_deployment_scaler.pkl', 'wb')
pickle.dump(normalizer, output)

Serving your SciKit Learn Model as a Prediction API

Image Data Preprocessing​

Image Dataset Preparation​

Get Image Dataset from Local Directory​

Get Image Labels from Folder Structure​

Dataset Export​

Data Preprocessing​

Feature Extraction​

Histogram of Oriented Gradients (HOG)​

Model Training​

SGD Classifier Training Pipeline​

Model Evaluation​

Hyperparameter Tuning​

Training Pipeline​

GridSearch​

Best Model Evaluation​

Trained Model Export​

Deployment Model​

Model Training​

Model Evaluation​

Model Export​

Image Data Preprocessing

Image Dataset Preparation

Get Image Dataset from Local Directory

Get Image Labels from Folder Structure

Dataset Export

Data Preprocessing

Feature Extraction

Histogram of Oriented Gradients (HOG)

Model Training

SGD Classifier Training Pipeline

Model Evaluation

Hyperparameter Tuning

Training Pipeline

GridSearch

Best Model Evaluation

Trained Model Export

Deployment Model

Model Training

Model Evaluation

Model Export