1 year ago
#384311
James Albert
Vision transformer binary classifier is only predicting one class
I wrote a code for a vision transformer to classify mammograms into benign and malignant. After training for 30 epochs, the model is, however, predicting only one class(benign). All the final predictions for test images are in the range: 0.47 - 0.49.
The code:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255,
samplewise_center = True,
samplewise_std_normalization = True,
validation_split = 0.1,
rotation_range=180,
shear_range=15,
zoom_range=0.2,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
vertical_flip=True,
fill_mode='reflect')
train_gen = datagen.flow_from_dataframe(dataframe = DF_TRAIN,
directory = TRAIN_PATH,
x_col = 'image_file_path',
y_col = 'pathology',
subset = 'training',
batch_size = BATCH_SIZE,
# seed = 1,
color_mode = 'rgb',
shuffle = True,
class_mode = 'binary',
target_size = (IMAGE_SIZE, IMAGE_SIZE))
valid_gen = datagen.flow_from_dataframe(dataframe = DF_TRAIN,
directory = TRAIN_PATH,
x_col = 'image_file_path',
y_col = 'pathology',
subset = 'validation',
batch_size = BATCH_SIZE,
# seed = 1,
color_mode = 'rgb',
shuffle = False,
class_mode = 'binary',
target_size = (IMAGE_SIZE, IMAGE_SIZE))
test_gen = datagen.flow_from_dataframe(dataframe = DF_TEST,
directory = TEST_PATH,
x_col = 'image_file_path',
y_col = 'pathology',
# subset = 'validation',
batch_size = BATCH_SIZE,
# seed = 1,
color_mode = 'rgb',
shuffle = False,
class_mode = 'binary',
target_size = (IMAGE_SIZE, IMAGE_SIZE))
def mlp(x, hidden_units, dropout_rate):
for units in hidden_units:
x = layers.Dense(units, activation=tf.nn.gelu)(x)
x = layers.Dropout(dropout_rate)(x)
return x
class Patches(layers.Layer):
def __init__(self, patch_size):
super(Patches, self).__init__()
self.patch_size = patch_size
def call(self, images):
batch_size = tf.shape(images)[0]
patches = tf.image.extract_patches(
images=images,
sizes=[1, self.patch_size, self.patch_size, 1],
strides=[1, self.patch_size, self.patch_size, 1],
rates=[1, 1, 1, 1],
padding="VALID",
)
patch_dims = patches.shape[-1]
patches = tf.reshape(patches, [batch_size, -1, patch_dims])
return patches
def get_config(self):
config = super().get_config().copy()
config.update({
'patch_size': self.patch_size,
})
return config
class PatchEncoder(layers.Layer):
def __init__(self, num_patches, projection_dim):
super(PatchEncoder, self).__init__()
self.num_patches = num_patches
self.projection = layers.Dense(units=projection_dim)
self.position_embedding = layers.Embedding(
input_dim=num_patches, output_dim=projection_dim
)
def call(self, patch):
positions = tf.range(start=0, limit=self.num_patches, delta=1)
encoded = self.projection(patch) + self.position_embedding(positions)
return encoded
def get_config(self):
config = super().get_config().copy()
config.update({
'num_patches': self.num_patches,
'projection': self.projection,
'position_embedding': self.position_embedding,
})
return config
def create_vit_classifier():
inputs = layers.Input(shape=input_shape)
# Create patches.
patches = Patches(patch_size)(inputs)
# Encode patches.
encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)
# Create multiple layers of the Transformer block.
for _ in range(transformer_layers):
# Layer normalization 1.
x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
# Create a multi-head attention layer.
attention_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=projection_dim, dropout=0.1)(x1, x1)
# Skip connection 1.
x2 = layers.Add()([attention_output, encoded_patches])
# Layer normalization 2.
x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
# MLP.
x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
# Skip connection 2.
encoded_patches = layers.Add()([x3, x2])
# Create a [batch_size, projection_dim] tensor.
representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
representation = layers.Flatten()(representation)
representation = layers.Dropout(0.5)(representation)
# Add MLP.
features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
# Classify outputs.
logits = layers.Dense(num_classes, activation="sigmoid")(features)
# Create the Keras model.
model = tf.keras.Model(inputs=inputs, outputs=logits)
return model
def run_experiment(model):
optimizer = tfa.optimizers.AdamW(learning_rate=learning_rate, weight_decay=weight_decay)
model.compile(
optimizer=optimizer,
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
STEP_SIZE_TRAIN = train_gen.n // train_gen.batch_size
STEP_SIZE_VALID = valid_gen.n // valid_gen.batch_size
print(STEP_SIZE_TRAIN, STEP_SIZE_VALID)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy',
factor=0.2,
patience=2,
verbose=1,
min_delta=1e-4,
min_lr=1e-6,
mode='max')
checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath='./model_3.hdf5',
monitor='val_accuracy',
verbose=1,
save_best_only=True,
save_weights_only=True,
mode='max')
callbacks = [reduce_lr, checkpointer]
history = model.fit(x=train_gen,
steps_per_epoch=STEP_SIZE_TRAIN,
validation_data=valid_gen,
validation_steps=STEP_SIZE_VALID,
epochs=EPOCHS,
callbacks=callbacks,
verbose=1)
model.save(f'{save_path}/model_3.h5')
return history
vit_classifier = create_vit_classifier()
history = run_experiment(vit_classifier)
vit_classifier.load_weights(f'{save_path}/model_3.h5')
A = vit_classifier.predict(test_gen, steps = test_gen.n // test_gen.batch_size + 1)
predicted_classes = np.where(A > 0.5, 1, 0)
true_classes = test_gen.classes
class_labels = list(test_gen.class_indices.keys())
results = pd.DataFrame(list(zip(test_gen.filenames, true_classes, predicted_classes)),
columns =['Image name', 'True class', 'Predicted class'])
results = results.replace({"True class": classes_dict})
results = results.replace({"Predicted class": classes_dict})
prob = pd.DataFrame(A, columns =['Predicted probability'])
result_df = pd.concat([results, prob], axis=1)
result_df['Predicted probability'] = pd.Series(["{0:.1f}".format(val * 100) for val in result_df['Predicted probability']], index=result_df.index)
results_csv = f'{save_path}/results_3.csv'
with open(results_csv, mode='w') as f:
result_df.to_csv(f)
Confusion matrix:
[[428 0]
[276 0]]
Performance metrics:
Please help me figure out how to rectify this problem
python
keras
deep-learning
classification
transformer-model
0 Answers
Your Answer