From f1fd74c7eb485e4ea0cfb53f233404124c5665c6 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 12 Jun 2024 13:26:27 +0200 Subject: [PATCH] transformer patch size is dynamic now. --- config_params.json | 28 ++++++++++++++------------- models.py | 47 +++++++++++++++++++++++++++++++++++++--------- train.py | 30 +++++++++++++++++++++-------- 3 files changed, 75 insertions(+), 30 deletions(-) diff --git a/config_params.json b/config_params.json index 8a56de5..6b8b6ed 100644 --- a/config_params.json +++ b/config_params.json @@ -1,42 +1,44 @@ { - "backbone_type" : "nontransformer", - "task": "classification", + "backbone_type" : "transformer", + "task": "binarization", "n_classes" : 2, - "n_epochs" : 20, - "input_height" : 448, - "input_width" : 448, + "n_epochs" : 1, + "input_height" : 224, + "input_width" : 672, "weight_decay" : 1e-6, - "n_batch" : 6, + "n_batch" : 1, "learning_rate": 1e-4, - "f1_threshold_classification": 0.8, "patches" : true, "pretraining" : true, "augmentation" : false, "flip_aug" : false, "blur_aug" : false, "scaling" : true, + "degrading": false, + "brightening": false, "binarization" : false, "scaling_bluring" : false, "scaling_binarization" : false, "scaling_flip" : false, "rotation": false, "rotation_not_90": false, - "transformer_num_patches_xy": [28, 28], - "transformer_patchsize": 1, + "transformer_num_patches_xy": [7, 7], + "transformer_patchsize_x": 3, + "transformer_patchsize_y": 1, + "transformer_projection_dim": 192, "blur_k" : ["blur","guass","median"], "scales" : [0.6, 0.7, 0.8, 0.9, 1.1, 1.2, 1.4], "brightness" : [1.3, 1.5, 1.7, 2], "degrade_scales" : [0.2, 0.4], "flip_index" : [0, 1, -1], "thetha" : [10, -10], - "classification_classes_name" : {"0":"apple", "1":"orange"}, "continue_training": false, "index_start" : 0, "dir_of_start_model" : " ", "weighted_loss": false, "is_loss_soft_dice": false, "data_is_provided": false, - "dir_train": "./train", - "dir_eval": "./eval", - "dir_output": "./output" + "dir_train": "/home/vahid/Documents/test/training_data_sample_binarization", + "dir_eval": "/home/vahid/Documents/test/eval", + "dir_output": "/home/vahid/Documents/test/out" } diff --git a/models.py b/models.py index b8b0d27..1abf304 100644 --- a/models.py +++ b/models.py @@ -6,25 +6,49 @@ from tensorflow.keras import layers from tensorflow.keras.regularizers import l2 mlp_head_units = [2048, 1024] -projection_dim = 64 +#projection_dim = 64 transformer_layers = 8 num_heads = 4 resnet50_Weights_path = './pretrained_model/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' IMAGE_ORDERING = 'channels_last' MERGE_AXIS = -1 -transformer_units = [ - projection_dim * 2, - projection_dim, -] # Size of the transformer layers def mlp(x, hidden_units, dropout_rate): for units in hidden_units: x = layers.Dense(units, activation=tf.nn.gelu)(x) x = layers.Dropout(dropout_rate)(x) return x - class Patches(layers.Layer): + def __init__(self, patch_size_x, patch_size_y):#__init__(self, **kwargs):#:__init__(self, patch_size):#__init__(self, **kwargs): + super(Patches, self).__init__() + self.patch_size_x = patch_size_x + self.patch_size_y = patch_size_y + + def call(self, images): + #print(tf.shape(images)[1],'images') + #print(self.patch_size,'self.patch_size') + batch_size = tf.shape(images)[0] + patches = tf.image.extract_patches( + images=images, + sizes=[1, self.patch_size_y, self.patch_size_x, 1], + strides=[1, self.patch_size_y, self.patch_size_x, 1], + rates=[1, 1, 1, 1], + padding="VALID", + ) + patch_dims = patches.shape[-1] + patches = tf.reshape(patches, [batch_size, -1, patch_dims]) + return patches + def get_config(self): + + config = super().get_config().copy() + config.update({ + 'patch_size_x': self.patch_size_x, + 'patch_size_y': self.patch_size_y, + }) + return config + +class Patches_old(layers.Layer): def __init__(self, patch_size):#__init__(self, **kwargs):#:__init__(self, patch_size):#__init__(self, **kwargs): super(Patches, self).__init__() self.patch_size = patch_size @@ -369,8 +393,13 @@ def resnet50_unet(n_classes, input_height=224, input_width=224, task="segmentati return model -def vit_resnet50_unet(n_classes, patch_size, num_patches, input_height=224, input_width=224, task="segmentation", weight_decay=1e-6, pretraining=False): +def vit_resnet50_unet(n_classes, patch_size_x, patch_size_y, num_patches, projection_dim = 64, input_height=224, input_width=224, task="segmentation", weight_decay=1e-6, pretraining=False): inputs = layers.Input(shape=(input_height, input_width, 3)) + + transformer_units = [ + projection_dim * 2, + projection_dim, + ] # Size of the transformer layers IMAGE_ORDERING = 'channels_last' bn_axis=3 @@ -414,7 +443,7 @@ def vit_resnet50_unet(n_classes, patch_size, num_patches, input_height=224, inpu #patch_size_y = input_height / x.shape[1] #patch_size_x = input_width / x.shape[2] #patch_size = patch_size_x * patch_size_y - patches = Patches(patch_size)(x) + patches = Patches(patch_size_x, patch_size_y)(x) # Encode patches. encoded_patches = PatchEncoder(num_patches, projection_dim)(patches) @@ -434,7 +463,7 @@ def vit_resnet50_unet(n_classes, patch_size, num_patches, input_height=224, inpu # Skip connection 2. encoded_patches = layers.Add()([x3, x2]) - encoded_patches = tf.reshape(encoded_patches, [-1, x.shape[1], x.shape[2], 64]) + encoded_patches = tf.reshape(encoded_patches, [-1, x.shape[1], x.shape[2] , int( projection_dim / (patch_size_x * patch_size_y) )]) v1024_2048 = Conv2D( 1024 , (1, 1), padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay))(encoded_patches) v1024_2048 = (BatchNormalization(axis=bn_axis))(v1024_2048) diff --git a/train.py b/train.py index 9e06a66..bafcc9e 100644 --- a/train.py +++ b/train.py @@ -70,8 +70,10 @@ def config_params(): brightness = None # Brighten image for augmentation. flip_index = None # Flip image for augmentation. continue_training = False # Set to true if you would like to continue training an already trained a model. - transformer_patchsize = None # Patch size of vision transformer patches. + transformer_patchsize_x = None # Patch size of vision transformer patches. + transformer_patchsize_y = None transformer_num_patches_xy = None # Number of patches for vision transformer. + transformer_projection_dim = 64 # Transformer projection dimension index_start = 0 # Index of model to continue training from. E.g. if you trained for 3 epochs and last index is 2, to continue from model_1.h5, set "index_start" to 3 to start naming model with index 3. dir_of_start_model = '' # Directory containing pretrained encoder to continue training the model. is_loss_soft_dice = False # Use soft dice as loss function. When set to true, "weighted_loss" must be false. @@ -92,7 +94,7 @@ def run(_config, n_classes, n_epochs, input_height, brightening, binarization, blur_k, scales, degrade_scales, brightness, dir_train, data_is_provided, scaling_bluring, scaling_brightness, scaling_binarization, rotation, rotation_not_90, - thetha, scaling_flip, continue_training, transformer_patchsize, + thetha, scaling_flip, continue_training, transformer_projection_dim, transformer_patchsize_x, transformer_patchsize_y, transformer_num_patches_xy, backbone_type, flip_index, dir_eval, dir_output, pretraining, learning_rate, task, f1_threshold_classification, classification_classes_name): @@ -212,15 +214,27 @@ def run(_config, n_classes, n_epochs, input_height, if backbone_type=='nontransformer': model = resnet50_unet(n_classes, input_height, input_width, task, weight_decay, pretraining) elif backbone_type=='transformer': - num_patches = transformer_num_patches_xy[0]*transformer_num_patches_xy[1] + num_patches_x = transformer_num_patches_xy[0] + num_patches_y = transformer_num_patches_xy[1] + num_patches = num_patches_x * num_patches_y - if not (num_patches == (input_width / 32) * (input_height / 32)): - print("Error: transformer num patches error. Parameter transformer_num_patches_xy should be set to (input_width/32) = {} and (input_height/32) = {}".format(int(input_width / 32), int(input_height / 32)) ) + ##if not (num_patches == (input_width / 32) * (input_height / 32)): + ##print("Error: transformer num patches error. Parameter transformer_num_patches_xy should be set to (input_width/32) = {} and (input_height/32) = {}".format(int(input_width / 32), int(input_height / 32)) ) + ##sys.exit(1) + #if not (transformer_patchsize == 1): + #print("Error: transformer patchsize error. Parameter transformer_patchsizeshould set to 1" ) + #sys.exit(1) + if (input_height != (num_patches_y * transformer_patchsize_y * 32) ): + print("Error: transformer_patchsize_y or transformer_num_patches_xy height value error . input_height should be equal to ( transformer_num_patches_xy height value * transformer_patchsize_y * 32)") sys.exit(1) - if not (transformer_patchsize == 1): - print("Error: transformer patchsize error. Parameter transformer_patchsizeshould set to 1" ) + if (input_width != (num_patches_x * transformer_patchsize_x * 32) ): + print("Error: transformer_patchsize_x or transformer_num_patches_xy width value error . input_width should be equal to ( transformer_num_patches_xy width value * transformer_patchsize_x * 32)") sys.exit(1) - model = vit_resnet50_unet(n_classes, transformer_patchsize, num_patches, input_height, input_width, task, weight_decay, pretraining) + if (transformer_projection_dim % (transformer_patchsize_y * transformer_patchsize_x)) != 0: + print("Error: transformer_projection_dim error. The remainder when parameter transformer_projection_dim is divided by (transformer_patchsize_y*transformer_patchsize_x) should be zero") + sys.exit(1) + + model = vit_resnet50_unet(n_classes, transformer_patchsize_x, transformer_patchsize_y, num_patches, transformer_projection_dim, input_height, input_width, task, weight_decay, pretraining) #if you want to see the model structure just uncomment model summary. #model.summary()