Vision transformer binary classifier is only predicting one class

1 year ago

#384311

James Albert

I wrote a code for a vision transformer to classify mammograms into benign and malignant. After training for 30 epochs, the model is, however, predicting only one class(benign). All the final predictions for test images are in the range: 0.47 - 0.49.

The code:

datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255,
                                                          samplewise_center = True,
                                                          samplewise_std_normalization = True,
                                                          validation_split = 0.1,
                                                          rotation_range=180,
                                                          shear_range=15,
                                                          zoom_range=0.2,
                                                          width_shift_range=0.2,
                                                          height_shift_range=0.2,
                                                          horizontal_flip=True,
                                                          vertical_flip=True,
                                                          fill_mode='reflect')


train_gen = datagen.flow_from_dataframe(dataframe = DF_TRAIN,
                                        directory = TRAIN_PATH,
                                        x_col = 'image_file_path',
                                        y_col = 'pathology',
                                        subset = 'training',
                                        batch_size = BATCH_SIZE,
                                        # seed = 1,
                                        color_mode = 'rgb',
                                        shuffle = True,
                                        class_mode = 'binary',
                                        target_size = (IMAGE_SIZE, IMAGE_SIZE))


valid_gen = datagen.flow_from_dataframe(dataframe = DF_TRAIN,
                                        directory = TRAIN_PATH,
                                        x_col = 'image_file_path',
                                        y_col = 'pathology',
                                        subset = 'validation',
                                        batch_size = BATCH_SIZE,
                                        # seed = 1,
                                        color_mode = 'rgb',
                                        shuffle = False,
                                        class_mode = 'binary',
                                        target_size = (IMAGE_SIZE, IMAGE_SIZE))


test_gen = datagen.flow_from_dataframe(dataframe = DF_TEST,
                                        directory = TEST_PATH,
                                        x_col = 'image_file_path',
                                        y_col = 'pathology',
                                        # subset = 'validation',
                                        batch_size = BATCH_SIZE,
                                        # seed = 1,
                                        color_mode = 'rgb',
                                        shuffle = False,
                                        class_mode = 'binary',
                                        target_size = (IMAGE_SIZE, IMAGE_SIZE))

def mlp(x, hidden_units, dropout_rate):
    for units in hidden_units:
        x = layers.Dense(units, activation=tf.nn.gelu)(x)
        x = layers.Dropout(dropout_rate)(x)
    return x


class Patches(layers.Layer):
    def __init__(self, patch_size):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'patch_size': self.patch_size,
        })
        return config


class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'num_patches': self.num_patches,
            'projection': self.projection,
            'position_embedding': self.position_embedding,
        })
        return config


def create_vit_classifier():
    inputs = layers.Input(shape=input_shape)
    # Create patches.
    patches = Patches(patch_size)(inputs)
    # Encode patches.
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=projection_dim, dropout=0.1)(x1, x1)
        # Skip connection 1.
        x2 = layers.Add()([attention_output, encoded_patches])
        # Layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        # MLP.
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        # Skip connection 2.
        encoded_patches = layers.Add()([x3, x2])

    # Create a [batch_size, projection_dim] tensor.
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    # Add MLP.
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    # Classify outputs.
    logits = layers.Dense(num_classes, activation="sigmoid")(features)
    # Create the Keras model.
    model = tf.keras.Model(inputs=inputs, outputs=logits)
    return model

def run_experiment(model):
    optimizer = tfa.optimizers.AdamW(learning_rate=learning_rate, weight_decay=weight_decay)

    model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
        metrics=['accuracy'])

    STEP_SIZE_TRAIN = train_gen.n // train_gen.batch_size
    STEP_SIZE_VALID = valid_gen.n // valid_gen.batch_size
    print(STEP_SIZE_TRAIN, STEP_SIZE_VALID)

    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy',
                                                     factor=0.2,
                                                     patience=2,
                                                     verbose=1,
                                                     min_delta=1e-4,
                                                     min_lr=1e-6,
                                                     mode='max')


    checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath='./model_3.hdf5',
                                                      monitor='val_accuracy',
                                                      verbose=1,
                                                      save_best_only=True,
                                                      save_weights_only=True,
                                                      mode='max')

    callbacks = [reduce_lr, checkpointer]

    history = model.fit(x=train_gen,
                        steps_per_epoch=STEP_SIZE_TRAIN,
                        validation_data=valid_gen,
                        validation_steps=STEP_SIZE_VALID,
                        epochs=EPOCHS,
                        callbacks=callbacks,
                        verbose=1)

    model.save(f'{save_path}/model_3.h5')

    return history


vit_classifier = create_vit_classifier()
history = run_experiment(vit_classifier)

vit_classifier.load_weights(f'{save_path}/model_3.h5')

A = vit_classifier.predict(test_gen, steps = test_gen.n // test_gen.batch_size + 1)
predicted_classes = np.where(A > 0.5, 1, 0)
true_classes = test_gen.classes
class_labels = list(test_gen.class_indices.keys())

results = pd.DataFrame(list(zip(test_gen.filenames, true_classes, predicted_classes)),
               columns =['Image name', 'True class', 'Predicted class'])
results = results.replace({"True class": classes_dict})
results = results.replace({"Predicted class": classes_dict})
prob = pd.DataFrame(A,  columns =['Predicted probability'])

result_df = pd.concat([results, prob], axis=1)
result_df['Predicted probability'] = pd.Series(["{0:.1f}".format(val * 100) for val in result_df['Predicted probability']], index=result_df.index)

results_csv = f'{save_path}/results_3.csv'
with open(results_csv, mode='w') as f:
    result_df.to_csv(f)

Confusion matrix:

[[428   0]
 [276   0]]

Performance metrics:

Please help me figure out how to rectify this problem

python

keras

deep-learning

classification

transformer-model

0 Answers

Your Answer

Posts

Questions

Blogs

Jobs