Skip to content
Run in Google Colab View notebook on GitHub

Using Albumentations with Tensorflow

!pip install -q -U albumentations
!echo "$(pip freeze | grep albumentations) is successfully installed"
albumentations==0.4.6 is successfully installed

  • We'll we using an example from tensorflow_datasets.
! pip install --upgrade tensorflow_datasets

Run the example

# necessary imports
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
from functools import partial
from albumentations import (
    Compose, RandomBrightness, JpegCompression, HueSaturationValue, RandomContrast, HorizontalFlip,
    Rotate
)
AUTOTUNE = tf.data.experimental.AUTOTUNE
tfds.__version__
'3.2.1'
# load in the tf_flowers dataset
data, info= tfds.load(name="tf_flowers", split="train", as_supervised=True, with_info=True)
data
<PrefetchDataset shapes: ((None, None, 3), ()), types: (tf.uint8, tf.int64)>
info
tfds.core.DatasetInfo(
    name='tf_flowers',
    version=3.0.1,
    description='A large set of images of flowers',
    homepage='https://www.tensorflow.org/tutorials/load_data/images',
    features=FeaturesDict({
        'image': Image(shape=(None, None, 3), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=5),
    }),
    total_num_examples=3670,
    splits={
        'train': 3670,
    },
    supervised_keys=('image', 'label'),
    citation="""@ONLINE {tfflowers,
    author = "The TensorFlow Team",
    title = "Flowers",
    month = "jan",
    year = "2019",
    url = "http://download.tensorflow.org/example_images/flower_photos.tgz" }""",
    redistribution_info=,
)

An Example Pipeline Using tf.image

Process Data

def process_image(image, label, img_size):
    # cast and normalize image
    image = tf.image.convert_image_dtype(image, tf.float32)
    # apply simple augmentations
    image = tf.image.random_flip_left_right(image)
    image = tf.image.resize(image,[img_size, img_size])
    return image, label

ds_tf = data.map(partial(process_image, img_size=120), num_parallel_calls=AUTOTUNE).batch(30).prefetch(AUTOTUNE)
ds_tf
<PrefetchDataset shapes: ((None, 120, 120, 3), (None,)), types: (tf.float32, tf.int64)>

View images from the dataset

def view_image(ds):
    image, label = next(iter(ds)) # extract 1 batch from the dataset
    image = image.numpy()
    label = label.numpy()

    fig = plt.figure(figsize=(22, 22))
    for i in range(20):
        ax = fig.add_subplot(4, 5, i+1, xticks=[], yticks=[])
        ax.imshow(image[i])
        ax.set_title(f"Label: {label[i]}")
view_image(ds_tf)

Using tf.image is very efficient to create a pipeline but the disadvantage is that with tf.image we can only apply limited amounts of augmentations to our input data. One way to solve is issue is to use tf.keras ImageDataGenerator class but albumentations is faster.

An Example Pipeline using albumentations

To integrate albumentations into our tensorflow pipeline we can create two functions :
- Pipeline to apply augmentation. - a function that calls the above function and pass in our data through the pipeline. We can then wrap our 2nd Function under tf.numpy_function .

italicized text## Create Pipeline to Process data

# Instantiate augments
# we can apply as many augments we want and adjust the values accordingly
# here I have chosen the augments and their arguments at random
transforms = Compose([
            Rotate(limit=40),
            RandomBrightness(limit=0.1),
            JpegCompression(quality_lower=85, quality_upper=100, p=0.5),
            HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
            RandomContrast(limit=0.2, p=0.5),
            HorizontalFlip(),
        ])
def aug_fn(image, img_size):
    data = {"image":image}
    aug_data = transforms(**data)
    aug_img = aug_data["image"]
    aug_img = tf.cast(aug_img/255.0, tf.float32)
    aug_img = tf.image.resize(aug_img, size=[img_size, img_size])
    return aug_img
def process_data(image, label, img_size):
    aug_img = tf.numpy_function(func=aug_fn, inp=[image, img_size], Tout=tf.float32)
    return aug_img, label
# create dataset
ds_alb = data.map(partial(process_data, img_size=120),
                  num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)
ds_alb
<PrefetchDataset shapes: (<unknown>, ()), types: (tf.float32, tf.int64)>

Restoring dataset shapes.

The datasets loses its shape after applying a tf.numpy_function, so this is necessary for the sequential model and when inheriting from the model class.

def set_shapes(img, label, img_shape=(120,120,3)):
    img.set_shape(img_shape)
    label.set_shape([])
    return img, label
ds_alb = ds_alb.map(set_shapes, num_parallel_calls=AUTOTUNE).batch(32).prefetch(AUTOTUNE)
ds_alb
<PrefetchDataset shapes: ((None, 120, 120, 3), (None,)), types: (tf.float32, tf.int64)>

View images from the dataset

view_image(ds_alb)

We can then pass in this dataset to out model and call fit on our model


Note:

Some API's of tensorflow.keras.Model might not work, if you dont map the dataset with the set_shapes function.

What works without setting shapes :

from tensorflow.keras import models, layers
from tensorflow import keras

# Running the Model in eager mode using Sequential API

def create_model(input_shape):
    return models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(5, activation='softmax')])

model = create_model((120,120,3))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy', run_eagerly=True)
model.fit(ds_alb, epochs=2)
Epoch 1/2
115/115 [==============================] - 28s 246ms/step - loss: 1.4670 - accuracy: 0.3545
Epoch 2/2
115/115 [==============================] - 25s 216ms/step - loss: 1.1873 - accuracy: 0.5166
<tensorflow.python.keras.callbacks.History at 0x7fb68ee2c400>
# Functional API

input = keras.Input(shape=(120, 120, 3))
x = keras.layers.Conv2D(32, (3, 3), activation="relu")(input)
x = keras.layers.MaxPooling2D((2, 2))(x)
x = keras.layers.Conv2D(64, (3, 3), activation='relu')(x)
x = keras.layers.MaxPooling2D((2, 2))(x)
x = keras.layers.Conv2D(64, (3, 3), activation='relu')(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, activation='relu')(x)
x = keras.layers.Dense(5, activation='softmax')(x)

model = keras.Model(inputs=input, outputs=x)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy')
model.fit(ds_alb, epochs=2)
Epoch 1/2
115/115 [==============================] - 21s 186ms/step - loss: 1.4250 - accuracy: 0.3943
Epoch 2/2
115/115 [==============================] - 22s 189ms/step - loss: 1.1752 - accuracy: 0.5256
<tensorflow.python.keras.callbacks.History at 0x7fb68e5eac88>
# Transfer Learning [freeze base model layers]: Sequential API

base_model = keras.applications.ResNet50(include_top=False, input_shape=(120, 120, 3), weights="imagenet")
base_model.trainable = False

model = keras.models.Sequential([
        base_model,
        keras.layers.Conv2D(32, (1, 1), activation="relu"),
        keras.layers.Dropout(0.2),
        keras.layers.Flatten(),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(5, activation='softmax'),
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy')
model.fit(ds_alb, epochs=2)
Epoch 1/2
115/115 [==============================] - 29s 250ms/step - loss: 1.5457 - accuracy: 0.3052
Epoch 2/2
115/115 [==============================] - 27s 238ms/step - loss: 1.4697 - accuracy: 0.3638
<tensorflow.python.keras.callbacks.History at 0x7fb68f0386d8>
# Transfer Learning [unfreeze all layers]: Sequential API

base_model = keras.applications.ResNet50(include_top=False, input_shape=(120, 120, 3), weights="imagenet")
base_model.trainable = True

model = keras.models.Sequential([
        base_model,
        keras.layers.Conv2D(32, (1, 1), activation="relu"),
        keras.layers.Flatten(),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(5, activation='softmax'),
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy')
model.fit(ds_alb, epochs=2)
Epoch 1/2
115/115 [==============================] - 46s 399ms/step - loss: 1.2163 - accuracy: 0.5708
Epoch 2/2
115/115 [==============================] - 45s 395ms/step - loss: 0.8039 - accuracy: 0.7204
<tensorflow.python.keras.callbacks.History at 0x7fb68b1a9f28>
# Transfer Learning [freeze all layers of feature extractor]: Functional API

base_model = keras.applications.ResNet50(include_top=False, input_shape=(120, 120, 3), weights="imagenet")
base_model.trainable = False

input = keras.Input(shape=(120, 120, 3))
x = base_model(input, training=False)
x = keras.layers.Conv2D(32, (1, 1), activation="relu")(x)
x = keras.layers.Dropout(0.2)(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, activation='relu')(x)
x = keras.layers.Dense(5, activation='softmax')(x)

model = keras.Model(inputs=input, outputs=x)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy')
model.fit(ds_alb, epochs=2)
Epoch 1/2
115/115 [==============================] - 30s 261ms/step - loss: 1.5425 - accuracy: 0.3060
Epoch 2/2
115/115 [==============================] - 30s 258ms/step - loss: 1.4852 - accuracy: 0.3591
<tensorflow.python.keras.callbacks.History at 0x7fb688e2bb00>
# Transfer Learning [freeze all layers of feature extractor]: Subclass API

base_model = keras.applications.ResNet50(include_top=False, input_shape=(120, 120, 3), weights="imagenet")
base_model.trainable = False

class MyModel(keras.Model):
    def __init__(self, base_model):
        super(MyModel, self).__init__()
        self.base = base_model
        self.layer_1 = keras.layers.Flatten()
        self.layer_2 = keras.layers.Dense(64, activation='relu')
        self.layer_3 = keras.layers.Dense(5, activation='softmax')

    @tf.function
    def call(self, xb):
        x = self.base(xb)
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        return x


model = MyModel(base_model=base_model)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy')

model.fit(ds_alb, epochs=2)
Epoch 1/2
115/115 [==============================] - 30s 257ms/step - loss: 1.5624 - accuracy: 0.3281
Epoch 2/2
115/115 [==============================] - 29s 256ms/step - loss: 1.4372 - accuracy: 0.3970
<tensorflow.python.keras.callbacks.History at 0x7fb68778d080>
# Transfer Learning using [unfreeze all layers of feature extractor]: Subclass API 

base_model = keras.applications.ResNet50(include_top=False, input_shape=(120, 120, 3), weights="imagenet")
base_model.trainable = True

class MyModel(keras.Model):
    def __init__(self, base_model):
        super(MyModel, self).__init__()
        self.base = base_model
        self.layer_1 = keras.layers.Flatten()
        self.layer_2 = keras.layers.Dense(64, activation='relu')
        self.layer_3 = keras.layers.Dense(5, activation='softmax')

    @tf.function
    def call(self, xb):
        x = self.base(xb)
        x = self.layer_1(x)
        x = self.layer_2(x)
        x = self.layer_3(x)
        return x


model = MyModel(base_model=base_model)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy')

model.fit(ds_alb, epochs=2)
Epoch 1/2
115/115 [==============================] - 46s 396ms/step - loss: 1.7472 - accuracy: 0.5422
Epoch 2/2
115/115 [==============================] - 45s 395ms/step - loss: 1.4129 - accuracy: 0.5714
<tensorflow.python.keras.callbacks.History at 0x7fb685e6da20>

What works only if you set the shapes of the dataset :

# Using Sequential API without transfer learning & Eager Execution

def create_model(input_shape):
    return models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(5, activation='softmax')])

model = create_model((120,120,3))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy')
model.fit(ds_alb, epochs=2)
Epoch 1/2
115/115 [==============================] - 22s 192ms/step - loss: 1.4568 - accuracy: 0.3752
Epoch 2/2
115/115 [==============================] - 22s 194ms/step - loss: 1.1913 - accuracy: 0.5082
<tensorflow.python.keras.callbacks.History at 0x7fb6851346d8>
# Using Subclass API without transfer learning & Eager Execution

class MyModel(keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = keras.layers.Conv2D(32, (3, 3), activation='relu')
        self.pool1 = keras.layers.MaxPooling2D((2, 2))
        self.conv2 = keras.layers.Conv2D(64, (3, 3), activation='relu')
        self.pool2 = keras.layers.MaxPooling2D((2, 2))
        self.conv3 = keras.layers.Conv2D(64, (3, 3), activation='relu')
        self.flat = keras.layers.Flatten()
        self.dense1 = keras.layers.Dense(64, activation='relu')
        self.dense2 = keras.layers.Dense(5, activation='softmax')

    def call(self, xb):
        x = self.conv1(xb)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.flat(x)
        x = self.dense1(x)
        x = self.dense2(x)
        return x


model = MyModel()

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics='accuracy')

model.fit(ds_alb, epochs=2)
Epoch 1/2
115/115 [==============================] - 22s 194ms/step - loss: 1.4170 - accuracy: 0.3774
Epoch 2/2
115/115 [==============================] - 22s 192ms/step - loss: 1.1577 - accuracy: 0.5371
<tensorflow.python.keras.callbacks.History at 0x7fb6848369b0>