Tf Image Classifier
Image Dataset Preparation
from glob import glob
import matplotlib.pyplot as plt
import os
from skimage import (
io,
color,
exposure,
transform,
feature
)
from sklearn.model_selection import train_test_split
import splitfolders # pip install split-folders
SEED = 42
Get Image Dataset from Local Directory
data_dir = os.listdir('../dataset/Flower_Dataset/complete')
print(data_dir)
# ['Gladiolus', 'Adenium', 'Alpinia_Purpurata', 'Alstroemeria', 'Amaryllis', 'Anthurium_Andraeanum', 'Antirrhinum', 'Aquilegia', 'Billbergia_Pyramidalis', 'Cattleya', 'Cirsium', 'Coccinia_Grandis', 'Crocus', 'Cyclamen', 'Dahlia', 'Datura_Metel', 'Dianthus_Barbatus', 'Digitalis', 'Echinacea_Purpurea', 'Echinops_Bannaticus', 'Fritillaria_Meleagris', 'Gaura', 'Gazania', 'Gerbera', 'Guzmania', 'Helianthus_Annuus', 'Iris_Pseudacorus', 'Leucanthemum', 'Malvaceae', 'Narcissus_Pseudonarcissus', 'Nerine', 'Nymphaea_Tetragona', 'Paphiopedilum', 'Passiflora', 'Pelargonium', 'Petunia', 'Platycodon_Grandiflorus', 'Plumeria', 'Poinsettia', 'Primula', 'Protea_Cynaroides', 'Rose', 'Rudbeckia', 'Strelitzia_Reginae', 'Tropaeolum_Majus', 'Tussilago', 'Viola', 'Zantedeschia_Aethiopica']
glob('../dataset/Flower_Dataset/complete/{}/*.jpg'.format('Viola'))
# ['../dataset/Flower_Dataset/complete/Viola/Viola_185.jpg',
# '../dataset/Flower_Dataset/complete/Viola/Viola_186.jpg',
# '../dataset/Flower_Dataset/complete/Viola/Viola_187.jpg',
# ...
all_files = []
for subfolder in data_dir:
all_files += glob('../dataset/Flower_Dataset/complete/{}/*.jpg'.format(subfolder))
len(all_files)
# 12278
data_collection = io.ImageCollection(all_files)
data_collection.files
len(data_collection)
Resize to 224,224,3
c = 0
for image in data_collection:
# print(data_collection.files[c]) # debug file read errors
img_ori = io.imread(data_collection.files[c], as_gray=False)
img_thumb = transform.resize(img_ori, output_shape=(224, 224, 3), anti_aliasing=True)
plt.imsave(data_collection.files[c], img_thumb)
c = c + 1
Train Test Split
input_folder = "../dataset/Flower_Dataset/complete"
output_folder = "../dataset/Flower_Dataset/split"
splitfolders.ratio(input_folder, output=output_folder, seed=SEED, ratio=(.75, .25))