From c502e67c14b073812f2dd660fa9db6b1bd81e5c1 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 28 Aug 2024 02:09:27 +0200 Subject: [PATCH] adding foreground rgb to augmentation --- config_params.json | 10 ++++++---- train.py | 19 +++++++++++++------ utils.py | 40 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 57 insertions(+), 12 deletions(-) diff --git a/config_params.json b/config_params.json index e5f652d..1db8026 100644 --- a/config_params.json +++ b/config_params.json @@ -13,13 +13,14 @@ "augmentation" : true, "flip_aug" : false, "blur_aug" : false, - "scaling" : true, + "scaling" : false, "adding_rgb_background": true, - "add_red_textlines": true, - "channels_shuffling": true, + "adding_rgb_foreground": true, + "add_red_textlines": false, + "channels_shuffling": false, "degrading": false, "brightening": false, - "binarization" : false, + "binarization" : true, "scaling_bluring" : false, "scaling_binarization" : false, "scaling_flip" : false, @@ -51,6 +52,7 @@ "dir_eval": "/home/vahid/Documents/test/sbb_pixelwise_segmentation/test_label/pageextractor_test/eval_new", "dir_output": "/home/vahid/Documents/test/sbb_pixelwise_segmentation/test_label/pageextractor_test/output_new", "dir_rgb_backgrounds": "/home/vahid/Documents/1_2_test_eynollah/set_rgb_background", + "dir_rgb_foregrounds": "/home/vahid/Documents/1_2_test_eynollah/out_set_rgb_foreground", "dir_img_bin": "/home/vahid/Documents/test/sbb_pixelwise_segmentation/test_label/pageextractor_test/train_new/images_bin" } diff --git a/train.py b/train.py index 5dfad07..848ff6a 100644 --- a/train.py +++ b/train.py @@ -54,6 +54,7 @@ def config_params(): brightening = False # If true, brightening will be applied to the image. The amount of brightening is defined with "brightness" in config_params.json. binarization = False # If true, Otsu thresholding will be applied to augment the input with binarized images. adding_rgb_background = False + adding_rgb_foreground = False add_red_textlines = False channels_shuffling = False dir_train = None # Directory of training dataset with subdirectories having the names "images" and "labels". @@ -95,6 +96,7 @@ def config_params(): dir_img_bin = None number_of_backgrounds_per_image = 1 dir_rgb_backgrounds = None + dir_rgb_foregrounds = None @ex.automain @@ -103,20 +105,25 @@ def run(_config, n_classes, n_epochs, input_height, index_start, dir_of_start_model, is_loss_soft_dice, n_batch, patches, augmentation, flip_aug, blur_aug, padding_white, padding_black, scaling, degrading,channels_shuffling, - brightening, binarization, adding_rgb_background, add_red_textlines, blur_k, scales, degrade_scales,shuffle_indexes, + brightening, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, blur_k, scales, degrade_scales,shuffle_indexes, brightness, dir_train, data_is_provided, scaling_bluring, scaling_brightness, scaling_binarization, rotation, rotation_not_90, thetha, scaling_flip, continue_training, transformer_projection_dim, transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_cnn_first, transformer_patchsize_x, transformer_patchsize_y, transformer_num_patches_xy, backbone_type, flip_index, dir_eval, dir_output, - pretraining, learning_rate, task, f1_threshold_classification, classification_classes_name, dir_img_bin, number_of_backgrounds_per_image,dir_rgb_backgrounds): + pretraining, learning_rate, task, f1_threshold_classification, classification_classes_name, dir_img_bin, number_of_backgrounds_per_image,dir_rgb_backgrounds, dir_rgb_foregrounds): if dir_rgb_backgrounds: list_all_possible_background_images = os.listdir(dir_rgb_backgrounds) else: list_all_possible_background_images = None + if dir_rgb_foregrounds: + list_all_possible_foreground_rgbs = os.listdir(dir_rgb_foregrounds) + else: + list_all_possible_foreground_rgbs = None + if task == "segmentation" or task == "enhancement" or task == "binarization": if data_is_provided: dir_train_flowing = os.path.join(dir_output, 'train') @@ -175,18 +182,18 @@ def run(_config, n_classes, n_epochs, input_height, # writing patches into a sub-folder in order to be flowed from directory. provide_patches(imgs_list, segs_list, dir_img, dir_seg, dir_flow_train_imgs, dir_flow_train_labels, input_height, input_width, blur_k, - blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background,add_red_textlines, channels_shuffling, + blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background,adding_rgb_foreground, add_red_textlines, channels_shuffling, scaling, degrading, brightening, scales, degrade_scales, brightness, flip_index,shuffle_indexes, scaling_bluring, scaling_brightness, scaling_binarization, rotation, rotation_not_90, thetha, scaling_flip, task, augmentation=augmentation, - patches=patches, dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds) + patches=patches, dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds, dir_rgb_foregrounds=dir_rgb_foregrounds,list_all_possible_foreground_rgbs=list_all_possible_foreground_rgbs) provide_patches(imgs_list_test, segs_list_test, dir_img_val, dir_seg_val, dir_flow_eval_imgs, dir_flow_eval_labels, input_height, input_width, - blur_k, blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background, add_red_textlines, channels_shuffling, + blur_k, blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, channels_shuffling, scaling, degrading, brightening, scales, degrade_scales, brightness, flip_index, shuffle_indexes, scaling_bluring, scaling_brightness, scaling_binarization, - rotation, rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=patches,dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds) + rotation, rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=patches,dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds,dir_rgb_foregrounds=dir_rgb_foregrounds,list_all_possible_foreground_rgbs=list_all_possible_foreground_rgbs ) if weighted_loss: weights = np.zeros(n_classes) diff --git a/utils.py b/utils.py index 84af85e..d38e798 100644 --- a/utils.py +++ b/utils.py @@ -40,6 +40,25 @@ def return_binary_image_with_given_rgb_background(img_bin, img_rgb_background): return img_final +def return_binary_image_with_given_rgb_background_and_given_foreground_rgb(img_bin, img_rgb_background, rgb_foreground): + img_rgb_background = resize_image(img_rgb_background ,img_bin.shape[0], img_bin.shape[1]) + + img_final = np.copy(img_bin) + img_foreground = np.zeros(img_bin.shape) + + + img_foreground[:,:,0][img_bin[:,:,0] == 0] = rgb_foreground[0] + img_foreground[:,:,1][img_bin[:,:,0] == 0] = rgb_foreground[1] + img_foreground[:,:,2][img_bin[:,:,0] == 0] = rgb_foreground[2] + + + img_final[:,:,0][img_bin[:,:,0] != 0] = img_rgb_background[:,:,0][img_bin[:,:,0] != 0] + img_final[:,:,1][img_bin[:,:,1] != 0] = img_rgb_background[:,:,1][img_bin[:,:,1] != 0] + img_final[:,:,2][img_bin[:,:,2] != 0] = img_rgb_background[:,:,2][img_bin[:,:,2] != 0] + + img_final = img_final + img_foreground + return img_final + def return_binary_image_with_given_rgb_background_red_textlines(img_bin, img_rgb_background, img_color): img_rgb_background = resize_image(img_rgb_background ,img_bin.shape[0], img_bin.shape[1]) @@ -641,10 +660,10 @@ def get_patches_num_scale_new(dir_img_f, dir_seg_f, img, label, height, width, i def provide_patches(imgs_list_train, segs_list_train, dir_img, dir_seg, dir_flow_train_imgs, dir_flow_train_labels, input_height, input_width, blur_k, blur_aug, - padding_white, padding_black, flip_aug, binarization, adding_rgb_background, add_red_textlines, channels_shuffling, scaling, degrading, + padding_white, padding_black, flip_aug, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, channels_shuffling, scaling, degrading, brightening, scales, degrade_scales, brightness, flip_index, shuffle_indexes, scaling_bluring, scaling_brightness, scaling_binarization, rotation, - rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=False, dir_img_bin=None,number_of_backgrounds_per_image=None,list_all_possible_background_images=None, dir_rgb_backgrounds=None): + rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=False, dir_img_bin=None,number_of_backgrounds_per_image=None,list_all_possible_background_images=None, dir_rgb_backgrounds=None, dir_rgb_foregrounds=None, list_all_possible_foreground_rgbs=None): indexer = 0 for im, seg_i in tqdm(zip(imgs_list_train, segs_list_train)): @@ -754,6 +773,23 @@ def provide_patches(imgs_list_train, segs_list_train, dir_img, dir_seg, dir_flow indexer += 1 + if adding_rgb_foreground: + img_bin_corr = cv2.imread(dir_img_bin + '/' + img_name+'.png') + for i_n in range(number_of_backgrounds_per_image): + background_image_chosen_name = random.choice(list_all_possible_background_images) + foreground_rgb_chosen_name = random.choice(list_all_possible_foreground_rgbs) + + img_rgb_background_chosen = cv2.imread(dir_rgb_backgrounds + '/' + background_image_chosen_name) + foreground_rgb_chosen = np.load(dir_rgb_foregrounds + '/' + foreground_rgb_chosen_name) + + img_with_overlayed_background = return_binary_image_with_given_rgb_background_and_given_foreground_rgb(img_bin_corr, img_rgb_background_chosen, foreground_rgb_chosen) + + cv2.imwrite(dir_flow_train_imgs + '/img_' + str(indexer) + '.png', resize_image(img_with_overlayed_background, input_height, input_width)) + cv2.imwrite(dir_flow_train_labels + '/img_' + str(indexer) + '.png', + resize_image(cv2.imread(dir_of_label_file), input_height, input_width)) + + indexer += 1 + if add_red_textlines: img_bin_corr = cv2.imread(dir_img_bin + '/' + img_name+'.png') img_red_context = return_image_with_red_elements(cv2.imread(dir_img + '/'+im), img_bin_corr)