adding foreground rgb to augmentation

2026-02-26 03:12:24 +01:00 · 2024-08-28 02:09:27 +02:00 · 2024-08-28 02:09:27 +02:00 · c502e67c14
commit c502e67c14
parent 4f0e3efa2b
3 changed files with 57 additions and 12 deletions
--- a/config_params.json
+++ b/config_params.json
@ -13,13 +13,14 @@
    "augmentation" : true,
    "flip_aug" : false,
    "blur_aug" : false,
-    "scaling" : true,
+    "scaling" : false,
    "adding_rgb_background": true,
-    "add_red_textlines": true,
-    "channels_shuffling": true,
+    "adding_rgb_foreground": true,
+    "add_red_textlines": false,
+    "channels_shuffling": false,
    "degrading": false,
    "brightening": false,
-    "binarization" : false,
+    "binarization" : true,
    "scaling_bluring" : false,
    "scaling_binarization" : false,
    "scaling_flip" : false,
@ -51,6 +52,7 @@
    "dir_eval": "/home/vahid/Documents/test/sbb_pixelwise_segmentation/test_label/pageextractor_test/eval_new",
    "dir_output": "/home/vahid/Documents/test/sbb_pixelwise_segmentation/test_label/pageextractor_test/output_new",
    "dir_rgb_backgrounds": "/home/vahid/Documents/1_2_test_eynollah/set_rgb_background",
+    "dir_rgb_foregrounds": "/home/vahid/Documents/1_2_test_eynollah/out_set_rgb_foreground",
    "dir_img_bin": "/home/vahid/Documents/test/sbb_pixelwise_segmentation/test_label/pageextractor_test/train_new/images_bin"
    
 }
--- a/train.py
+++ b/train.py
@ -54,6 +54,7 @@ def config_params():
    brightening = False  # If true, brightening will be applied to the image. The amount of brightening is defined with "brightness" in config_params.json.
    binarization = False  # If true, Otsu thresholding will be applied to augment the input with binarized images.
    adding_rgb_background = False
+    adding_rgb_foreground = False
    add_red_textlines = False
    channels_shuffling = False
    dir_train = None  # Directory of training dataset with subdirectories having the names "images" and "labels".
@ -95,6 +96,7 @@ def config_params():
    dir_img_bin = None
    number_of_backgrounds_per_image = 1
    dir_rgb_backgrounds = None
+    dir_rgb_foregrounds = None


@ex.automain
@ -103,20 +105,25 @@ def run(_config, n_classes, n_epochs, input_height,
        index_start, dir_of_start_model, is_loss_soft_dice,
        n_batch, patches, augmentation, flip_aug,
        blur_aug, padding_white, padding_black, scaling, degrading,channels_shuffling,
-        brightening, binarization, adding_rgb_background, add_red_textlines, blur_k, scales, degrade_scales,shuffle_indexes,
+        brightening, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, blur_k, scales, degrade_scales,shuffle_indexes,
        brightness, dir_train, data_is_provided, scaling_bluring,
        scaling_brightness, scaling_binarization, rotation, rotation_not_90,
        thetha, scaling_flip, continue_training, transformer_projection_dim,
        transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_cnn_first,
        transformer_patchsize_x, transformer_patchsize_y,
        transformer_num_patches_xy, backbone_type, flip_index, dir_eval, dir_output,
-        pretraining, learning_rate, task, f1_threshold_classification, classification_classes_name, dir_img_bin, number_of_backgrounds_per_image,dir_rgb_backgrounds):
+        pretraining, learning_rate, task, f1_threshold_classification, classification_classes_name, dir_img_bin, number_of_backgrounds_per_image,dir_rgb_backgrounds, dir_rgb_foregrounds):
    
    if dir_rgb_backgrounds:
        list_all_possible_background_images = os.listdir(dir_rgb_backgrounds)
    else:
        list_all_possible_background_images = None
    
+    if dir_rgb_foregrounds:
+        list_all_possible_foreground_rgbs = os.listdir(dir_rgb_foregrounds)
+    else:
+        list_all_possible_foreground_rgbs = None
+        
    if task == "segmentation" or task == "enhancement" or task == "binarization":
        if data_is_provided:
            dir_train_flowing = os.path.join(dir_output, 'train')
@ -175,18 +182,18 @@ def run(_config, n_classes, n_epochs, input_height,
            # writing patches into a sub-folder in order to be flowed from directory.
            provide_patches(imgs_list, segs_list, dir_img, dir_seg, dir_flow_train_imgs,
                            dir_flow_train_labels, input_height, input_width, blur_k,
-                            blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background,add_red_textlines, channels_shuffling,
+                            blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background,adding_rgb_foreground, add_red_textlines, channels_shuffling,
                            scaling, degrading, brightening, scales, degrade_scales, brightness,
                            flip_index,shuffle_indexes, scaling_bluring, scaling_brightness, scaling_binarization,
                            rotation, rotation_not_90, thetha, scaling_flip, task, augmentation=augmentation,
-                            patches=patches, dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds)
+                            patches=patches, dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds, dir_rgb_foregrounds=dir_rgb_foregrounds,list_all_possible_foreground_rgbs=list_all_possible_foreground_rgbs)
            
            provide_patches(imgs_list_test, segs_list_test, dir_img_val, dir_seg_val,
                            dir_flow_eval_imgs, dir_flow_eval_labels, input_height, input_width,
-                            blur_k, blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background, add_red_textlines, channels_shuffling,
+                            blur_k, blur_aug, padding_white, padding_black, flip_aug, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, channels_shuffling,
                            scaling, degrading, brightening, scales, degrade_scales, brightness,
                            flip_index, shuffle_indexes, scaling_bluring, scaling_brightness, scaling_binarization,
-                            rotation, rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=patches,dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds)
+                            rotation, rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=patches,dir_img_bin=dir_img_bin,number_of_backgrounds_per_image=number_of_backgrounds_per_image,list_all_possible_background_images=list_all_possible_background_images, dir_rgb_backgrounds=dir_rgb_backgrounds,dir_rgb_foregrounds=dir_rgb_foregrounds,list_all_possible_foreground_rgbs=list_all_possible_foreground_rgbs )

        if weighted_loss:
            weights = np.zeros(n_classes)
--- a/utils.py
+++ b/utils.py
@ -40,6 +40,25 @@ def return_binary_image_with_given_rgb_background(img_bin, img_rgb_background):
    
    return img_final

+def return_binary_image_with_given_rgb_background_and_given_foreground_rgb(img_bin, img_rgb_background, rgb_foreground):
+    img_rgb_background = resize_image(img_rgb_background ,img_bin.shape[0], img_bin.shape[1])
+    
+    img_final = np.copy(img_bin)
+    img_foreground = np.zeros(img_bin.shape)
+    
+    
+    img_foreground[:,:,0][img_bin[:,:,0] == 0] = rgb_foreground[0]
+    img_foreground[:,:,1][img_bin[:,:,0] == 0] = rgb_foreground[1]
+    img_foreground[:,:,2][img_bin[:,:,0] == 0] = rgb_foreground[2]
+    
+    
+    img_final[:,:,0][img_bin[:,:,0] != 0] = img_rgb_background[:,:,0][img_bin[:,:,0] != 0]
+    img_final[:,:,1][img_bin[:,:,1] != 0] = img_rgb_background[:,:,1][img_bin[:,:,1] != 0]
+    img_final[:,:,2][img_bin[:,:,2] != 0] = img_rgb_background[:,:,2][img_bin[:,:,2] != 0]
+    
+    img_final = img_final + img_foreground
+    return img_final
+
 def return_binary_image_with_given_rgb_background_red_textlines(img_bin, img_rgb_background, img_color):
    img_rgb_background = resize_image(img_rgb_background ,img_bin.shape[0], img_bin.shape[1])
    
@ -641,10 +660,10 @@ def get_patches_num_scale_new(dir_img_f, dir_seg_f, img, label, height, width, i

 def provide_patches(imgs_list_train, segs_list_train, dir_img, dir_seg, dir_flow_train_imgs,
                    dir_flow_train_labels, input_height, input_width, blur_k, blur_aug,
-                    padding_white, padding_black, flip_aug, binarization, adding_rgb_background, add_red_textlines, channels_shuffling, scaling, degrading,
+                    padding_white, padding_black, flip_aug, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, channels_shuffling, scaling, degrading,
                    brightening, scales, degrade_scales, brightness, flip_index, shuffle_indexes,
                    scaling_bluring, scaling_brightness, scaling_binarization, rotation,
-                    rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=False, dir_img_bin=None,number_of_backgrounds_per_image=None,list_all_possible_background_images=None, dir_rgb_backgrounds=None):
+                    rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=False, dir_img_bin=None,number_of_backgrounds_per_image=None,list_all_possible_background_images=None, dir_rgb_backgrounds=None, dir_rgb_foregrounds=None, list_all_possible_foreground_rgbs=None):
    
    indexer = 0
    for im, seg_i in tqdm(zip(imgs_list_train, segs_list_train)):
@ -754,6 +773,23 @@ def provide_patches(imgs_list_train, segs_list_train, dir_img, dir_seg, dir_flow
                        
                        indexer += 1
                        
+                if adding_rgb_foreground:
+                    img_bin_corr = cv2.imread(dir_img_bin + '/' + img_name+'.png')
+                    for i_n in range(number_of_backgrounds_per_image):
+                        background_image_chosen_name = random.choice(list_all_possible_background_images)
+                        foreground_rgb_chosen_name = random.choice(list_all_possible_foreground_rgbs)
+                        
+                        img_rgb_background_chosen = cv2.imread(dir_rgb_backgrounds + '/' + background_image_chosen_name)
+                        foreground_rgb_chosen = np.load(dir_rgb_foregrounds + '/' + foreground_rgb_chosen_name)
+                        
+                        img_with_overlayed_background = return_binary_image_with_given_rgb_background_and_given_foreground_rgb(img_bin_corr, img_rgb_background_chosen, foreground_rgb_chosen)
+                        
+                        cv2.imwrite(dir_flow_train_imgs + '/img_' + str(indexer) + '.png', resize_image(img_with_overlayed_background, input_height, input_width))
+                        cv2.imwrite(dir_flow_train_labels + '/img_' + str(indexer) + '.png',
+                                    resize_image(cv2.imread(dir_of_label_file), input_height, input_width))
+                        
+                        indexer += 1
+                        
                if add_red_textlines:
                    img_bin_corr = cv2.imread(dir_img_bin + '/' + img_name+'.png')
                    img_red_context = return_image_with_red_elements(cv2.imread(dir_img + '/'+im), img_bin_corr)