diff --git a/src/eynollah/training/metrics.py b/src/eynollah/training/metrics.py
index cd30b02..a8f47d7 100644
--- a/src/eynollah/training/metrics.py
+++ b/src/eynollah/training/metrics.py
@@ -147,6 +147,7 @@ def generalized_dice_loss(y_true, y_pred):
     return 1 - generalized_dice_coeff2(y_true, y_pred)
 
 
+# TODO: document where this is from
 def soft_dice_loss(y_true, y_pred, epsilon=1e-6):
     """
     Soft dice loss calculation for arbitrary batch size, number of classes, and number of spatial dimensions.
@@ -175,6 +176,7 @@ def soft_dice_loss(y_true, y_pred, epsilon=1e-6):
     return 1.00 - K.mean(numerator / (denominator + epsilon))  # average over classes and batch
 
 
+# TODO: document where this is from
 def seg_metrics(y_true, y_pred, metric_name, metric_type='standard', drop_last=True, mean_per_class=False,
                 verbose=False):
     """ 
@@ -267,6 +269,8 @@ def seg_metrics(y_true, y_pred, metric_name, metric_type='standard', drop_last=T
     return K.mean(non_zero_sum / non_zero_count)
 
 
+# TODO: document where this is from
+# TODO: Why a different implementation than IoU from utils?
 def mean_iou(y_true, y_pred, **kwargs):
     """
     Compute mean Intersection over Union of two segmentation masks, via Keras.
@@ -311,6 +315,7 @@ def iou_vahid(y_true, y_pred):
     return K.mean(iou)
 
 
+# TODO: copy from utils?
 def IoU_metric(Yi, y_predi):
     #  mean Intersection over Union
     #  Mean IoU = TP/(FN + TP + FP)
@@ -337,6 +342,7 @@ def IoU_metric_keras(y_true, y_pred):
     return IoU_metric(y_true.eval(session=sess), y_pred.eval(session=sess))
 
 
+# TODO: unused, remove?
 def jaccard_distance_loss(y_true, y_pred, smooth=100):
     """
     Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|)
diff --git a/src/eynollah/training/models.py b/src/eynollah/training/models.py
index d1b0aa2..9030096 100644
--- a/src/eynollah/training/models.py
+++ b/src/eynollah/training/models.py
@@ -5,6 +5,8 @@ from tensorflow.keras.layers import *
 from tensorflow.keras import layers
 from tensorflow.keras.regularizers import l2
 
+from eynollah.patch_encoder import Patches, PatchEncoder
+
 ##mlp_head_units = [512, 256]#[2048, 1024]
 ###projection_dim = 64
 ##transformer_layers = 2#8
@@ -38,87 +40,6 @@ def mlp(x, hidden_units, dropout_rate):
         x = layers.Dropout(dropout_rate)(x)
     return x
 
-class Patches(layers.Layer):
-    def __init__(self, patch_size_x, patch_size_y):#__init__(self, **kwargs):#:__init__(self, patch_size):#__init__(self, **kwargs):
-        super(Patches, self).__init__()
-        self.patch_size_x = patch_size_x
-        self.patch_size_y = patch_size_y
-
-    def call(self, images):
-        #print(tf.shape(images)[1],'images')
-        #print(self.patch_size,'self.patch_size')
-        batch_size = tf.shape(images)[0]
-        patches = tf.image.extract_patches(
-            images=images,
-            sizes=[1, self.patch_size_y, self.patch_size_x, 1],
-            strides=[1, self.patch_size_y, self.patch_size_x, 1],
-            rates=[1, 1, 1, 1],
-            padding="VALID",
-        )
-        #patch_dims = patches.shape[-1]
-        patch_dims = tf.shape(patches)[-1]
-        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
-        return patches
-    def get_config(self):
-
-        config = super().get_config().copy()
-        config.update({
-            'patch_size_x': self.patch_size_x,
-            'patch_size_y': self.patch_size_y,
-        })
-        return config
-
-class Patches_old(layers.Layer):
-    def __init__(self, patch_size):#__init__(self, **kwargs):#:__init__(self, patch_size):#__init__(self, **kwargs):
-        super(Patches, self).__init__()
-        self.patch_size = patch_size
-
-    def call(self, images):
-        #print(tf.shape(images)[1],'images')
-        #print(self.patch_size,'self.patch_size')
-        batch_size = tf.shape(images)[0]
-        patches = tf.image.extract_patches(
-            images=images,
-            sizes=[1, self.patch_size, self.patch_size, 1],
-            strides=[1, self.patch_size, self.patch_size, 1],
-            rates=[1, 1, 1, 1],
-            padding="VALID",
-        )
-        patch_dims = patches.shape[-1]
-        #print(patches.shape,patch_dims,'patch_dims')
-        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
-        return patches
-    def get_config(self):
-
-        config = super().get_config().copy()
-        config.update({
-            'patch_size': self.patch_size,
-        })
-        return config
-
-    
-class PatchEncoder(layers.Layer):
-    def __init__(self, num_patches, projection_dim):
-        super(PatchEncoder, self).__init__()
-        self.num_patches = num_patches
-        self.projection = layers.Dense(units=projection_dim)
-        self.position_embedding = layers.Embedding(
-            input_dim=num_patches, output_dim=projection_dim
-        )
-
-    def call(self, patch):
-        positions = tf.range(start=0, limit=self.num_patches, delta=1)
-        encoded = self.projection(patch) + self.position_embedding(positions)
-        return encoded
-    def get_config(self):
-
-        config = super().get_config().copy()
-        config.update({
-            'num_patches': self.num_patches,
-            'projection': self.projection,
-            'position_embedding': self.position_embedding,
-        })
-        return config
     
     
 def one_side_pad(x):
diff --git a/src/eynollah/training/train.py b/src/eynollah/training/train.py
index c15a562..7a0cb3d 100644
--- a/src/eynollah/training/train.py
+++ b/src/eynollah/training/train.py
@@ -175,22 +175,94 @@ def config_params():
     characters_txt_file = None # Directory of characters text file needed for cnn_rnn_ocr model training. The file ends with .txt
 
 @ex.automain
-def run(_config, n_classes, n_epochs, input_height,
-        input_width, weight_decay, weighted_loss,
-        index_start, dir_of_start_model, is_loss_soft_dice,
-        n_batch, patches, augmentation, flip_aug,
-        blur_aug, padding_white, padding_black, scaling, shifting, degrading,channels_shuffling,
-        brightening, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, blur_k, scales, degrade_scales,shuffle_indexes,
-        brightness, dir_train, data_is_provided, scaling_bluring,
-        scaling_brightness, scaling_binarization, rotation, rotation_not_90,
-        thetha, thetha_padd, scaling_flip, continue_training, transformer_projection_dim,
-        transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_cnn_first,
-        transformer_patchsize_x, transformer_patchsize_y,
-        transformer_num_patches_xy, backbone_type, save_interval, flip_index, dir_eval, dir_output,
-        pretraining, learning_rate, task, f1_threshold_classification, classification_classes_name, dir_img_bin, number_of_backgrounds_per_image,dir_rgb_backgrounds,
-        dir_rgb_foregrounds, characters_txt_file, color_padding_rotation, bin_deg, image_inversion, white_noise_strap, textline_skewing, textline_skewing_bin,
-        textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth, textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin,
-        textline_down_in_depth, textline_down_in_depth_bin, pepper_bin_aug, pepper_aug, padd_colors, pepper_indexes, white_padds, skewing_amplitudes, max_len):
+def run(
+    _config,
+    n_classes,
+    n_epochs,
+    input_height,
+    input_width,
+    weight_decay,
+    weighted_loss,
+    index_start,
+    dir_of_start_model,
+    is_loss_soft_dice,
+    n_batch,
+    patches,
+    augmentation,
+    flip_aug,
+    blur_aug,
+    padding_white,
+    padding_black,
+    scaling,
+    shifting,
+    degrading,
+    channels_shuffling,
+    brightening,
+    binarization,
+    adding_rgb_background,
+    adding_rgb_foreground,
+    add_red_textlines,
+    blur_k,
+    scales,
+    degrade_scales,
+    shuffle_indexes,
+    brightness,
+    dir_train,
+    data_is_provided,
+    scaling_bluring,
+    scaling_brightness,
+    scaling_binarization,
+    rotation,
+    rotation_not_90,
+    thetha,
+    thetha_padd,
+    scaling_flip,
+    continue_training,
+    transformer_projection_dim,
+    transformer_mlp_head_units,
+    transformer_layers,
+    transformer_num_heads,
+    transformer_cnn_first,
+    transformer_patchsize_x,
+    transformer_patchsize_y,
+    transformer_num_patches_xy,
+    backbone_type,
+    save_interval,
+    flip_index,
+    dir_eval,
+    dir_output,
+    pretraining,
+    learning_rate,
+    task,
+    f1_threshold_classification,
+    classification_classes_name,
+    dir_img_bin,
+    number_of_backgrounds_per_image,
+    dir_rgb_backgrounds,
+    dir_rgb_foregrounds,
+    characters_txt_file,
+    color_padding_rotation,
+    bin_deg,
+    image_inversion,
+    white_noise_strap,
+    textline_skewing,
+    textline_skewing_bin,
+    textline_left_in_depth,
+    textline_left_in_depth_bin,
+    textline_right_in_depth,
+    textline_right_in_depth_bin,
+    textline_up_in_depth,
+    textline_up_in_depth_bin,
+    textline_down_in_depth,
+    textline_down_in_depth_bin,
+    pepper_bin_aug,
+    pepper_aug,
+    padd_colors,
+    pepper_indexes,
+    white_padds,
+    skewing_amplitudes,
+    max_len,
+):
     
     if dir_rgb_backgrounds:
         list_all_possible_background_images = os.listdir(dir_rgb_backgrounds)
@@ -201,6 +273,10 @@ def run(_config, n_classes, n_epochs, input_height,
         list_all_possible_foreground_rgbs = os.listdir(dir_rgb_foregrounds)
     else:
         list_all_possible_foreground_rgbs = None
+
+    dir_seg = None
+    weights = None
+    model = None
         
     if task == "segmentation" or task == "enhancement" or task == "binarization":
         if data_is_provided:
@@ -285,6 +361,7 @@ def run(_config, n_classes, n_epochs, input_height,
                         pass
             else:
 
+                assert dir_seg is not None
                 for obj in os.listdir(dir_seg):
                     try:
                         label_obj = cv2.imread(dir_seg + '/' + obj)
@@ -314,6 +391,8 @@ def run(_config, n_classes, n_epochs, input_height,
                     model = load_model(dir_of_start_model, compile=True, custom_objects={'loss': weighted_categorical_crossentropy(weights)})
                 if not is_loss_soft_dice and not weighted_loss:
                     model = load_model(dir_of_start_model , compile=True,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches})
+            else:
+                raise ValueError("backbone_type must be 'nontransformer' or 'transformer'")
         else:
             index_start = 0
             if backbone_type=='nontransformer':
@@ -348,6 +427,7 @@ def run(_config, n_classes, n_epochs, input_height,
                         sys.exit(1)
                     model = vit_resnet50_unet_transformer_before_cnn(n_classes, transformer_patchsize_x, transformer_patchsize_y, num_patches, transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_projection_dim, input_height, input_width, task, weight_decay, pretraining)
         
+        assert model is not None
         #if you want to see the model structure just uncomment model summary.
         model.summary()
 
@@ -377,9 +457,7 @@ def run(_config, n_classes, n_epochs, input_height,
         ##score_best=[]
         ##score_best.append(0)
         
-        if save_interval:
-            save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config)
-            
+        save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) if save_interval else None
             
         for i in tqdm(range(index_start, n_epochs + index_start)):
             if save_interval:
@@ -459,8 +537,7 @@ def run(_config, n_classes, n_epochs, input_height,
         opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)#1e-4)#(lr_schedule)
         model.compile(optimizer=opt)
         
-        if save_interval:
-            save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config)
+        save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) if save_interval else None
         
         for i in tqdm(range(index_start, n_epochs + index_start)):
             if save_interval:
@@ -559,8 +636,7 @@ def run(_config, n_classes, n_epochs, input_height,
         model.compile(loss="binary_crossentropy",
                             optimizer = opt_adam,metrics=['accuracy'])
         
-        if save_interval:
-            save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config)
+        save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) if save_interval else None
             
         for i in range(n_epochs):
             if save_interval:
diff --git a/src/eynollah/training/utils.py b/src/eynollah/training/utils.py
index c589957..0a92935 100644
--- a/src/eynollah/training/utils.py
+++ b/src/eynollah/training/utils.py
@@ -38,6 +38,7 @@ def scale_padd_image_for_ocr(img, height, width):
     img_fin[:,:width_new,:] = img_res[:,:,:]
     return img_fin
 
+# TODO: document where this is from
 def add_salt_and_pepper_noise(img, salt_prob, pepper_prob):
     """
     Add salt-and-pepper noise to an image.
@@ -108,14 +109,17 @@ def do_padding_for_ocr(img, percent_height, padding_color):
 
     if padding_color == 'white':
         img_new = np.ones((height_new, width_new, img.shape[2])).astype(float) * 255
-    if padding_color == 'black':
+    elif padding_color == 'black':
         img_new = np.zeros((height_new, width_new, img.shape[2])).astype(float)
+    else:
+        raise ValueError("padding_color must be 'white' or 'black'")
 
     img_new[h_start:h_start + img.shape[0], w_start:w_start + img.shape[1], :] = np.copy(img[:, :, :])
 
 
     return img_new
 
+# TODO: document where this is from
 def do_deskewing(img, amplitude):
     height, width = img.shape[:2]
 
@@ -133,107 +137,79 @@ def do_deskewing(img, amplitude):
     curved_image = cv2.remap(img, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
     return curved_image
 
-def do_left_in_depth(img):
+# TODO: document where this is from
+def do_direction_in_depth(img, direction: str):
     height, width = img.shape[:2]
 
-    # Define the original corner points of the image
-    src_points = np.float32([
-        [0, 0],          # Top-left corner
-        [width, 0],      # Top-right corner
-        [0, height],     # Bottom-left corner
-        [width, height]  # Bottom-right corner
-    ])
+    if direction == 'left':
+        # Define the original corner points of the image
+        src_points = np.float32([
+            [0, 0],          # Top-left corner
+            [width, 0],      # Top-right corner
+            [0, height],     # Bottom-left corner
+            [width, height]  # Bottom-right corner
+        ])
 
-    # Define the new corner points for a subtle right-to-left tilt
-    dst_points = np.float32([
-        [2, 13],                # Slight inward shift for top-left
-        [width, 0],            # Slight downward shift for top-right
-        [2, height-13],           # Slight inward shift for bottom-left
-        [width, height]    # Slight upward shift for bottom-right
-    ])
+        # Define the new corner points for a subtle right-to-left tilt
+        dst_points = np.float32([
+            [2, 13],                # Slight inward shift for top-left
+            [width, 0],            # Slight downward shift for top-right
+            [2, height-13],           # Slight inward shift for bottom-left
+            [width, height]    # Slight upward shift for bottom-right
+        ])
+    elif direction == 'right':
+        # Define the original corner points of the image
+        src_points = np.float32([
+            [0, 0],          # Top-left corner
+            [width, 0],      # Top-right corner
+            [0, height],     # Bottom-left corner
+            [width, height]  # Bottom-right corner
+        ])
 
-    # Compute the perspective transformation matrix
-    matrix = cv2.getPerspectiveTransform(src_points, dst_points)
+        # Define the new corner points for a subtle right-to-left tilt
+        dst_points = np.float32([
+            [0, 0],                # Slight inward shift for top-left
+            [width, 13],            # Slight downward shift for top-right
+            [0, height],           # Slight inward shift for bottom-left
+            [width, height - 13]    # Slight upward shift for bottom-right
+        ])
 
-    # Apply the perspective warp
-    warped_image = cv2.warpPerspective(img, matrix, (width, height))
-    return warped_image
+    elif direction == 'up':
+        # Define the original corner points of the image
+        src_points = np.float32([
+            [0, 0],          # Top-left corner
+            [width, 0],      # Top-right corner
+            [0, height],     # Bottom-left corner
+            [width, height]  # Bottom-right corner
+        ])
 
-def do_right_in_depth(img):
-    height, width = img.shape[:2]
+        # Define the new corner points to simulate a tilted perspective
+        # Make the top part appear closer and the bottom part farther
+        dst_points = np.float32([
+            [50, 0],                 # Top-left moved inward
+            [width - 50, 0],         # Top-right moved inward
+            [0, height],             # Bottom-left remains the same
+            [width, height]          # Bottom-right remains the same
+        ])
+    elif direction == 'down':
+        # Define the original corner points of the image
+        src_points = np.float32([
+            [0, 0],          # Top-left corner
+            [width, 0],      # Top-right corner
+            [0, height],     # Bottom-left corner
+            [width, height]  # Bottom-right corner
+        ])
 
-    # Define the original corner points of the image
-    src_points = np.float32([
-        [0, 0],          # Top-left corner
-        [width, 0],      # Top-right corner
-        [0, height],     # Bottom-left corner
-        [width, height]  # Bottom-right corner
-    ])
-
-    # Define the new corner points for a subtle right-to-left tilt
-    dst_points = np.float32([
-        [0, 0],                # Slight inward shift for top-left
-        [width, 13],            # Slight downward shift for top-right
-        [0, height],           # Slight inward shift for bottom-left
-        [width, height - 13]    # Slight upward shift for bottom-right
-    ])
-
-    # Compute the perspective transformation matrix
-    matrix = cv2.getPerspectiveTransform(src_points, dst_points)
-
-    # Apply the perspective warp
-    warped_image = cv2.warpPerspective(img, matrix, (width, height))
-    return warped_image
-
-def do_up_in_depth(img):
-    # Get the dimensions of the image
-    height, width = img.shape[:2]
-
-    # Define the original corner points of the image
-    src_points = np.float32([
-        [0, 0],          # Top-left corner
-        [width, 0],      # Top-right corner
-        [0, height],     # Bottom-left corner
-        [width, height]  # Bottom-right corner
-    ])
-
-    # Define the new corner points to simulate a tilted perspective
-    # Make the top part appear closer and the bottom part farther
-    dst_points = np.float32([
-        [50, 0],                 # Top-left moved inward
-        [width - 50, 0],         # Top-right moved inward
-        [0, height],             # Bottom-left remains the same
-        [width, height]          # Bottom-right remains the same
-    ])
-
-    # Compute the perspective transformation matrix
-    matrix = cv2.getPerspectiveTransform(src_points, dst_points)
-
-    # Apply the perspective warp
-    warped_image = cv2.warpPerspective(img, matrix, (width, height))
-    return warped_image
-
-
-def do_down_in_depth(img):
-    # Get the dimensions of the image
-    height, width = img.shape[:2]
-
-    # Define the original corner points of the image
-    src_points = np.float32([
-        [0, 0],          # Top-left corner
-        [width, 0],      # Top-right corner
-        [0, height],     # Bottom-left corner
-        [width, height]  # Bottom-right corner
-    ])
-
-    # Define the new corner points to simulate a tilted perspective
-    # Make the top part appear closer and the bottom part farther
-    dst_points = np.float32([
-        [0, 0],                 # Top-left moved inward
-        [width, 0],         # Top-right moved inward
-        [50, height],             # Bottom-left remains the same
-        [width - 50, height]          # Bottom-right remains the same
-    ])
+        # Define the new corner points to simulate a tilted perspective
+        # Make the top part appear closer and the bottom part farther
+        dst_points = np.float32([
+            [0, 0],                 # Top-left moved inward
+            [width, 0],         # Top-right moved inward
+            [50, height],             # Bottom-left remains the same
+            [width - 50, height]          # Bottom-right remains the same
+        ])
+    else:
+        raise ValueError("direction must be 'left', 'right', 'up' or 'down'")
 
     # Compute the perspective transformation matrix
     matrix = cv2.getPerspectiveTransform(src_points, dst_points)
@@ -254,6 +230,7 @@ def return_shuffled_channels(img, channels_order):
     img_sh[:,:,2]= img[:,:,channels_order[2]]
     return img_sh
 
+# TODO: Refactor into one {{{
 def return_binary_image_with_red_textlines(img_bin):
     img_red = np.copy(img_bin)
     
@@ -308,6 +285,8 @@ def return_image_with_red_elements(img, img_bin):
     img_final[:,:,1][img_bin[:,:,0]==0] = 0
     img_final[:,:,2][img_bin[:,:,0]==0] = 255
     return img_final
+
+# }}}
     
 def shift_image_and_label(img, label, type_shift):
     h_n = int(img.shape[0]*1.06)
@@ -513,9 +492,12 @@ def bluring(img_in, kind):
         img_blur = cv2.medianBlur(img_in, 5)
     elif kind == 'blur':
         img_blur = cv2.blur(img_in, (5, 5))
+    else:
+        raise ValueError("kind must be 'gauss', 'median' or 'blur'")
     return img_blur
 
 
+# TODO: document where this is from
 def elastic_transform(image, alpha, sigma, seedj, random_state=None):
     """Elastic deformation of images as described in [Simard2003]_.
     .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for
@@ -538,6 +520,7 @@ def elastic_transform(image, alpha, sigma, seedj, random_state=None):
     return distored_image.reshape(image.shape)
 
 
+# TODO: Use one of the utils/rotate.py functions for this
 def rotation_90(img):
     img_rot = np.zeros((img.shape[1], img.shape[0], img.shape[2]))
     img_rot[:, :, 0] = img[:, :, 0].T
@@ -546,6 +529,8 @@ def rotation_90(img):
     return img_rot
 
 
+# TODO: document where this is from
+# TODO: Use one of the utils/rotate.py functions for this
 def rotatedRectWithMaxArea(w, h, angle):
     """
   Given a rectangle of size wxh that has been rotated by 'angle' (in
@@ -574,6 +559,7 @@ def rotatedRectWithMaxArea(w, h, angle):
     return wr, hr
 
 
+# TODO: Use one of the utils/rotate.py functions for this
 def rotate_max_area(image, rotated, rotated_label, angle):
     """ image: cv2 image matrix object
         angle: in degree
@@ -587,6 +573,7 @@ def rotate_max_area(image, rotated, rotated_label, angle):
     x2 = x1 + int(wr)
     return rotated[y1:y2, x1:x2], rotated_label[y1:y2, x1:x2]
 
+# TODO: Use one of the utils/rotate.py functions for this
 def rotate_max_area_single_image(image, rotated, angle):
     """ image: cv2 image matrix object
         angle: in degree
@@ -600,12 +587,14 @@ def rotate_max_area_single_image(image, rotated, angle):
     x2 = x1 + int(wr)
     return rotated[y1:y2, x1:x2]
 
+# TODO: Use one of the utils/rotate.py functions for this
 def rotation_not_90_func(img, label, thetha):
     rotated = imutils.rotate(img, thetha)
     rotated_label = imutils.rotate(label, thetha)
     return rotate_max_area(img, rotated, rotated_label, thetha)
 
 
+# TODO: Use one of the utils/rotate.py functions for this
 def rotation_not_90_func_single_image(img, thetha):
     rotated = imutils.rotate(img, thetha)
     return rotate_max_area_single_image(img, rotated, thetha)
@@ -628,6 +617,7 @@ def color_images(seg, n_classes):
     return seg_img
 
 
+# TODO: use resize_image from utils
 def resize_image(seg_in, input_height, input_width):
     return cv2.resize(seg_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
 
@@ -640,6 +630,7 @@ def get_one_hot(seg, input_height, input_width, n_classes):
     return seg_f
 
 
+# TODO: document where this is from
 def IoU(Yi, y_predi):
     ## mean Intersection over Union
     ## Mean IoU = TP/(FN + TP + FP)
@@ -739,6 +730,7 @@ def data_gen(img_folder, mask_folder, batch_size, input_height, input_width, n_c
         yield img, mask
 
 
+# TODO: Use otsu_copy from utils
 def otsu_copy(img):
     img_r = np.zeros(img.shape)
     img1 = img[:, :, 0]
@@ -796,14 +788,13 @@ def get_patches(dir_img_f, dir_seg_f, img, label, height, width, indexer):
     return indexer
 
 
-def do_padding_white(img):
-    img_org_h = img.shape[0]
-    img_org_w = img.shape[1]
-    
+def do_padding_with_color(img, padding_color='black'):
     index_start_h = 4
     index_start_w = 4
     
-    img_padded = np.zeros((img.shape[0] + 2*index_start_h, img.shape[1]+ 2*index_start_w, img.shape[2])) + 255
+    img_padded = np.zeros((img.shape[0] + 2*index_start_h, img.shape[1]+ 2*index_start_w, img.shape[2]))
+    if padding_color == 'white':
+        img_padded += 255
     img_padded[index_start_h: index_start_h + img.shape[0], index_start_w: index_start_w + img.shape[1], :] = img[:, :, :]
     
     return img_padded.astype(float)
@@ -817,20 +808,7 @@ def do_degrading(img, scale):
     
     return resize_image(img_res, img_org_h, img_org_w)
     
-    
-def do_padding_black(img):
-    img_org_h = img.shape[0]
-    img_org_w = img.shape[1]
-    
-    index_start_h = 4
-    index_start_w = 4
-    
-    img_padded = np.zeros((img.shape[0] + 2*index_start_h, img.shape[1] + 2*index_start_w, img.shape[2]))
-    img_padded[index_start_h: index_start_h + img.shape[0], index_start_w: index_start_w + img.shape[1], :] = img[:, :, :]
-    
-    return img_padded.astype(float)
-
-
+# TODO: How is this different from do_padding_black?
 def do_padding_label(img):
     img_org_h = img.shape[0]
     img_org_w = img.shape[1]
@@ -867,57 +845,6 @@ def do_padding(img, label, height, width):
     return img_new,label_new
 
 
-def get_patches_num_scale(dir_img_f, dir_seg_f, img, label, height, width, indexer, n_patches, scaler):
-    if img.shape[0] < height or img.shape[1] < width:
-        img, label = do_padding(img, label, height, width)
-    
-    img_h = img.shape[0]
-    img_w = img.shape[1]
-    
-    height_scale = int(height * scaler)
-    width_scale = int(width * scaler)
-    
-    
-    nxf = img_w / float(width_scale)
-    nyf = img_h / float(height_scale)
-    
-    if nxf > int(nxf):
-        nxf = int(nxf) + 1
-    if nyf > int(nyf):
-        nyf = int(nyf) + 1
-        
-    nxf = int(nxf)
-    nyf = int(nyf)
-        
-    for i in range(nxf):
-        for j in range(nyf):
-            index_x_d = i * width_scale
-            index_x_u = (i + 1) * width_scale
-            
-            index_y_d = j * height_scale
-            index_y_u = (j + 1) * height_scale
-            
-            if index_x_u > img_w:
-                index_x_u = img_w
-                index_x_d = img_w - width_scale
-            if index_y_u > img_h:
-                index_y_u = img_h
-                index_y_d = img_h - height_scale
-                
-            
-            img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
-            label_patch = label[index_y_d:index_y_u, index_x_d:index_x_u, :]
-            
-            img_patch = resize_image(img_patch, height, width)
-            label_patch = resize_image(label_patch, height, width)
-            
-            cv2.imwrite(dir_img_f + '/img_' + str(indexer) + '.png', img_patch)
-            cv2.imwrite(dir_seg_f + '/img_' + str(indexer) + '.png', label_patch)
-            indexer += 1
-            
-    return indexer
-
-
 def get_patches_num_scale_new(dir_img_f, dir_seg_f, img, label, height, width, indexer, scaler):
     img = resize_image(img, int(img.shape[0] * scaler), int(img.shape[1] * scaler))
     label = resize_image(label, int(label.shape[0] * scaler), int(label.shape[1] * scaler))
@@ -967,13 +894,55 @@ def get_patches_num_scale_new(dir_img_f, dir_seg_f, img, label, height, width, i
     return indexer
 
 
-def provide_patches(imgs_list_train, segs_list_train, dir_img, dir_seg, dir_flow_train_imgs,
-                    dir_flow_train_labels, input_height, input_width, blur_k, blur_aug,
-                    padding_white, padding_black, flip_aug, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, channels_shuffling, scaling, shifting, degrading,
-                    brightening, scales, degrade_scales, brightness, flip_index, shuffle_indexes,
-                    scaling_bluring, scaling_brightness, scaling_binarization, rotation,
-                    rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=False, dir_img_bin=None,number_of_backgrounds_per_image=None,list_all_possible_background_images=None, dir_rgb_backgrounds=None, dir_rgb_foregrounds=None, list_all_possible_foreground_rgbs=None):
+# TODO: (far) too many args
+# TODO: refactor to combine with data_gen_ocr
+def provide_patches(
+    imgs_list_train,
+    segs_list_train,
+    dir_img,
+    dir_seg,
+    dir_flow_train_imgs,
+    dir_flow_train_labels,
+    input_height,
+    input_width,
+    blur_k,
+    blur_aug,
+    padding_white,
+    padding_black,
+    flip_aug,
+    binarization,
+    adding_rgb_background,
+    adding_rgb_foreground,
+    add_red_textlines,
+    channels_shuffling,
+    scaling,
+    shifting,
+    degrading,
+    brightening,
+    scales,
+    degrade_scales,
+    brightness,
+    flip_index,
+    shuffle_indexes,
+    scaling_bluring,
+    scaling_brightness,
+    scaling_binarization,
+    rotation,
+    rotation_not_90,
+    thetha,
+    scaling_flip,
+    task,
+    augmentation=False,
+    patches=False,
+    dir_img_bin=None,
+    number_of_backgrounds_per_image=None,
+    list_all_possible_background_images=None,
+    dir_rgb_backgrounds=None,
+    dir_rgb_foregrounds=None,
+    list_all_possible_foreground_rgbs=None,
+):
     
+    # TODO: why sepoarate var if you have seg_i?
     indexer = 0
     for im, seg_i in tqdm(zip(imgs_list_train, segs_list_train)):
         img_name = os.path.splitext(im)[0]
@@ -1282,26 +1251,88 @@ def provide_patches(imgs_list_train, segs_list_train, dir_img, dir_seg, dir_flow
                     for sc_ind in scales:
                         for f_i in flip_index:
                             indexer = get_patches_num_scale_new(dir_flow_train_imgs, dir_flow_train_labels,
+                                                                 
                                                                 cv2.flip( cv2.imread(dir_img + '/' + im), f_i),
                                                                 cv2.flip(cv2.imread(dir_of_label_file), f_i),
                                                                 input_height, input_width, indexer=indexer, scaler=sc_ind)
                             
                             
                             
-def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir_train, ls_files_images,
-                 augmentation, color_padding_rotation, rotation_not_90, blur_aug, degrading, bin_deg, brightening, padding_white,
-                 adding_rgb_foreground, adding_rgb_background, binarization, image_inversion, channels_shuffling, add_red_textlines, white_noise_strap,
-                 textline_skewing, textline_skewing_bin, textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth,
-                 textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, textline_down_in_depth, textline_down_in_depth_bin,
-                 pepper_bin_aug, pepper_aug, degrade_scales, number_of_backgrounds_per_image, thetha, thetha_padd, brightness, padd_colors,
-                 shuffle_indexes, pepper_indexes, skewing_amplitudes, blur_k, char_to_num, list_all_possible_background_images, 
-                 list_all_possible_foreground_rgbs, dir_rgb_backgrounds, dir_rgb_foregrounds, white_padds, dir_img_bin=None):
+def data_gen_ocr(
+    padding_token,
+    n_batch,
+    input_height,
+    input_width,
+    max_len,
+    dir_train,
+    ls_files_images,
+    augmentation,
+    color_padding_rotation,
+    rotation_not_90,
+    blur_aug,
+    degrading,
+    bin_deg,
+    brightening,
+    padding_white,
+    adding_rgb_foreground,
+    adding_rgb_background,
+    binarization,
+    image_inversion,
+    channels_shuffling,
+    add_red_textlines,
+    white_noise_strap,
+    textline_skewing,
+    textline_skewing_bin,
+    textline_left_in_depth,
+    textline_left_in_depth_bin,
+    textline_right_in_depth,
+    textline_right_in_depth_bin,
+    textline_up_in_depth,
+    textline_up_in_depth_bin,
+    textline_down_in_depth,
+    textline_down_in_depth_bin,
+    pepper_bin_aug,
+    pepper_aug,
+    degrade_scales,
+    number_of_backgrounds_per_image,
+    thetha,
+    thetha_padd,
+    brightness,
+    padd_colors,
+    shuffle_indexes,
+    pepper_indexes,
+    skewing_amplitudes,
+    blur_k,
+    char_to_num,
+    list_all_possible_background_images,
+    list_all_possible_foreground_rgbs,
+    dir_rgb_backgrounds,
+    dir_rgb_foregrounds,
+    white_padds,
+    dir_img_bin=None,
+):
     
     random.shuffle(ls_files_images)
 
     ret_x= np.zeros((n_batch, input_height,  input_width, 3)).astype(np.float32)
     ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
     batchcount = 0
+
+    def increment_batchcount(img_out, batchcount, ret_x, ret_y):
+        to_yield = None
+        img_out = scale_padd_image_for_ocr(img, input_height, input_width)
+        ret_x[batchcount, :,:,:] = img_out[:,:,:]
+        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
+        batchcount += 1
+        if batchcount>=n_batch:
+            ret_x = ret_x/255.
+            to_yield = {"image": ret_x, "label": ret_y}
+            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
+            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
+            batchcount = 0
+        return img_out, batchcount, ret_x, ret_y, to_yield
+
+    # TODO: Why while True + yield, why not return a list?
     while True:
         for i in ls_files_images:
             f_name = i.split('.')[0]
@@ -1316,153 +1347,65 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
 
             
             if augmentation:
-                img_out = scale_padd_image_for_ocr(img, input_height, input_width)
-                
-                ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                
-                batchcount+=1
-                
-                if batchcount>=n_batch:
-                    ret_x = ret_x/255.
-                    yield {"image": ret_x, "label": ret_y}
-                    ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                    ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                    batchcount = 0
+                img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img, batchcount, ret_x, ret_y)
+                if to_yield: yield to_yield
                 
                 if color_padding_rotation:
-                    for index, thetha_ind in enumerate(thetha_padd):
+                    for thetha_ind in thetha_padd:
                         for padd_col in padd_colors:
                             img_out = rotation_not_90_func_single_image(do_padding_for_ocr(img, 1.2, padd_col), thetha_ind)
-                            
-                            img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                            
-                            ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                            ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                            
-                            batchcount+=1
-                            
-                            if batchcount>=n_batch:
-                                ret_x = ret_x/255.
-                                yield {"image": ret_x, "label": ret_y}
-                                ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                                ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                                batchcount = 0
+                            img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                            if to_yield: yield to_yield
                         
                 if rotation_not_90:
-                    for index, thetha_ind in enumerate(thetha):
+                    for thetha_ind in thetha:
                         img_out = rotation_not_90_func_single_image(img, thetha_ind)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                        
-                        ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                        
-                        batchcount+=1
-                        
-                        if batchcount>=n_batch:
-                            ret_x = ret_x/255.
-                            yield {"image": ret_x, "label": ret_y}
-                            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                            batchcount = 0
+                        img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                        if to_yield: yield to_yield
                     
                 if blur_aug:
-                    for index, blur_type in enumerate(blur_k):
+                    for blur_type in blur_k:
                         img_out = bluring(img, blur_type)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                        
-                        ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                        
-                        batchcount+=1
-                        
-                        if batchcount>=n_batch:
-                            ret_x = ret_x/255.
-                            yield {"image": ret_x, "label": ret_y}
-                            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                            batchcount = 0
-                    
+                        img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                        if to_yield: yield to_yield
+
                 if degrading:
-                    for index, deg_scale_ind in enumerate(degrade_scales):
+                    for deg_scale_ind in degrade_scales:
                         try:
                             img_out  = do_degrading(img, deg_scale_ind)
-                            img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                        # TODO: qualify except
                         except:
                             img_out = np.copy(img)
-                            img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                        
-                        ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                        
-                        batchcount+=1
-                        
-                        if batchcount>=n_batch:
-                            ret_x = ret_x/255.
-                            yield {"image": ret_x, "label": ret_y}
-                            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                            batchcount = 0
+                        img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                        if to_yield: yield to_yield
                             
                 if bin_deg:
-                    for index, deg_scale_ind in enumerate(degrade_scales):
+                    for deg_scale_ind in degrade_scales:
                         try:
                             img_out  = do_degrading(img_bin_corr, deg_scale_ind)
-                            img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                        # TODO: qualify except
                         except:
                             img_out = np.copy(img_bin_corr)
-                            img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                        
-                        ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                        
-                        batchcount+=1
-                        
-                        if batchcount>=n_batch:
-                            ret_x = ret_x/255.
-                            yield {"image": ret_x, "label": ret_y}
-                            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                            batchcount = 0
-                    
+                        img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                        if to_yield: yield to_yield
                 
                 if brightening:
-                    for index, bright_scale_ind in enumerate(brightness):
+                    for bright_scale_ind in brightness:
                         try:
+                            # FIXME: dir_img is not defined in this scope, will always fail
                             img_out  = do_brightening(dir_img, bright_scale_ind)
+                        # TODO: qualify except
                         except:
                             img_out = np.copy(img)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                        
-                        ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                        
-                        batchcount+=1
-                        
-                        if batchcount>=n_batch:
-                            ret_x = ret_x/255.
-                            yield {"image": ret_x, "label": ret_y}
-                            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                            batchcount = 0
+                        img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                        if to_yield: yield to_yield
                         
                 if padding_white:
-                    for index, padding_size in enumerate(white_padds):
+                    for padding_size in white_padds:
                         for padd_col in padd_colors:
                             img_out  = do_padding_for_ocr(img, padding_size, padd_col)
-                            img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                            
-                            ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                            ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                            
-                            batchcount+=1
-                            
-                            if batchcount>=n_batch:
-                                ret_x = ret_x/255.
-                                yield {"image": ret_x, "label": ret_y}
-                                ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                                ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                                batchcount = 0
+                            img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                            if to_yield: yield to_yield
                             
                 if adding_rgb_foreground:
                     for i_n in range(number_of_backgrounds_per_image):
@@ -1472,448 +1415,252 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
                         img_rgb_background_chosen = cv2.imread(dir_rgb_backgrounds + '/' + background_image_chosen_name)
                         foreground_rgb_chosen = np.load(dir_rgb_foregrounds + '/' + foreground_rgb_chosen_name)
 
-                        img_with_overlayed_background = return_binary_image_with_given_rgb_background_and_given_foreground_rgb(img_bin_corr, img_rgb_background_chosen, foreground_rgb_chosen)
+                        img_out = return_binary_image_with_given_rgb_background_and_given_foreground_rgb(img_bin_corr, img_rgb_background_chosen, foreground_rgb_chosen)
                         
-                        img_out = scale_padd_image_for_ocr(img_with_overlayed_background, input_height, input_width)
-                        
-                        ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                        
-                        batchcount+=1
-                        
-                        if batchcount>=n_batch:
-                            ret_x = ret_x/255.
-                            yield {"image": ret_x, "label": ret_y}
-                            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                            batchcount = 0
+                        img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                        if to_yield: yield to_yield
                         
                        
                 if adding_rgb_background:
                     for i_n in range(number_of_backgrounds_per_image):
                         background_image_chosen_name = random.choice(list_all_possible_background_images)
                         img_rgb_background_chosen = cv2.imread(dir_rgb_backgrounds + '/' + background_image_chosen_name)
-                        img_with_overlayed_background = return_binary_image_with_given_rgb_background(img_bin_corr, img_rgb_background_chosen)
-                        
-                        img_out = scale_padd_image_for_ocr(img_with_overlayed_background, input_height, input_width)
-                        
-                        ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                        
-                        batchcount+=1
-                        
-                        if batchcount>=n_batch:
-                            ret_x = ret_x/255.
-                            yield {"image": ret_x, "label": ret_y}
-                            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                            batchcount = 0
+                        img_out = return_binary_image_with_given_rgb_background(img_bin_corr, img_rgb_background_chosen)
+                        img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                        if to_yield: yield to_yield
                         
                 if binarization:
                     img_out = scale_padd_image_for_ocr(img_bin_corr, input_height, input_width)
-                    
-                    ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                    ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                    
-                    batchcount+=1
-                    
-                    if batchcount>=n_batch:
-                        ret_x = ret_x/255.
-                        yield {"image": ret_x, "label": ret_y}
-                        ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                        ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                        batchcount = 0
+                    img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                    if to_yield: yield to_yield
                         
                 if image_inversion:
                     img_out = invert_image(img_bin_corr)
-                    img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                    
-                    ret_x[batchcount, :, :, :] = img_out[:,:,:]
-                    ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-
-                    batchcount+=1
-
-                    if batchcount>=n_batch:
-                        ret_x = ret_x/255.
-                        yield {"image": ret_x, "label": ret_y}
-                        ret_x = np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                        ret_y = np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                        batchcount = 0
-
+                    img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                    if to_yield: yield to_yield
 
                 if channels_shuffling:
                     for shuffle_index in shuffle_indexes:
                         img_out  = return_shuffled_channels(img, shuffle_index)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                        
-                        ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                        
-                        batchcount+=1
-                        
-                        if batchcount>=n_batch:
-                            ret_x = ret_x/255.
-                            yield {"image": ret_x, "label": ret_y}
-                            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                            batchcount = 0
-                        
+                        img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                        if to_yield: yield to_yield
                         
                 if add_red_textlines:
-                    img_red_context = return_image_with_red_elements(img, img_bin_corr)
-                    
-                    img_out = scale_padd_image_for_ocr(img_red_context, input_height, input_width)
-                    
-                    ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                    ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                    
-                    batchcount+=1
-                    
-                    if batchcount>=n_batch:
-                        ret_x = ret_x/255.
-                        yield {"image": ret_x, "label": ret_y}
-                        ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                        ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                        batchcount = 0
+                    img_out = return_image_with_red_elements(img, img_bin_corr)
+                    img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                    if to_yield: yield to_yield
                         
                 if white_noise_strap:
                     img_out  = return_image_with_strapped_white_noises(img)
-                    
-                    img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                    
-                    ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                    ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                    
-                    batchcount+=1
-                    
-                    if batchcount>=n_batch:
-                        ret_x = ret_x/255.
-                        yield {"image": ret_x, "label": ret_y}
-                        ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                        ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                        batchcount = 0
+                    img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                    if to_yield: yield to_yield
                         
                 if textline_skewing:
-                    for index, des_scale_ind in enumerate(skewing_amplitudes):
+                    for des_scale_ind in skewing_amplitudes:
                         try:
                             img_out  = do_deskewing(img, des_scale_ind)
-                            img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                        # TODO: qualify except
                         except:
                             img_out = np.copy(img)
-                            img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                        
-                        ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                        
-                        batchcount+=1
-                        
-                        if batchcount>=n_batch:
-                            ret_x = ret_x/255.
-                            yield {"image": ret_x, "label": ret_y}
-                            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                            batchcount = 0
+                        img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                        if to_yield: yield to_yield
                             
                 if textline_skewing_bin:
-                    for index, des_scale_ind in enumerate(skewing_amplitudes):
+                    for des_scale_ind in skewing_amplitudes:
                         try:
                             img_out  = do_deskewing(img_bin_corr, des_scale_ind)
-                            img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                        # TODO: qualify except
                         except:
                             img_out = np.copy(img_bin_corr)
-                            img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                        
-                        ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                        
-                        batchcount+=1
-                        
-                        if batchcount>=n_batch:
-                            ret_x = ret_x/255.
-                            yield {"image": ret_x, "label": ret_y}
-                            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                            batchcount = 0
-                            
+                        img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                        if to_yield: yield to_yield
                             
                 if textline_left_in_depth:
                     try:
-                        img_out  = do_left_in_depth(img)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                        img_out  = do_direction_in_depth(img, 'left')
+                    # TODO: qualify except
                     except:
                         img_out = np.copy(img)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                    
-                    ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                    ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                    
-                    batchcount+=1
-                    
-                    if batchcount>=n_batch:
-                        ret_x = ret_x/255.
-                        yield {"image": ret_x, "label": ret_y}
-                        ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                        ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                        batchcount = 0
-                        
+                    img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                    if to_yield: yield to_yield
                         
                 if textline_left_in_depth_bin:
                     try:
-                        img_out  = do_left_in_depth(img_bin_corr)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                        img_out  = do_direction_in_depth(img_bin_corr, 'left')
+                    # TODO: qualify except
                     except:
                         img_out = np.copy(img_bin_corr)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                    
-                    ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                    ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                    
-                    batchcount+=1
-                    
-                    if batchcount>=n_batch:
-                        ret_x = ret_x/255.
-                        yield {"image": ret_x, "label": ret_y}
-                        ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                        ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                        batchcount = 0
-                        
+                    img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                    if to_yield: yield to_yield
                         
                 if textline_right_in_depth:
                     try:
-                        img_out  = do_right_in_depth(img)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                        img_out  = do_direction_in_depth(img_bin_corr, 'right')
+                    # TODO: qualify except
                     except:
                         img_out = np.copy(img)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                    img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                    if to_yield: yield to_yield
                     
-                    ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                    ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                    
-                    batchcount+=1
-                    
-                    if batchcount>=n_batch:
-                        ret_x = ret_x/255.
-                        yield {"image": ret_x, "label": ret_y}
-                        ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                        ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                        batchcount = 0
-                        
                         
                 if textline_right_in_depth_bin:
                     try:
-                        img_out  = do_right_in_depth(img_bin_corr)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                        img_out  = do_direction_in_depth(img_bin_corr, 'right')
+                    # TODO: qualify except
                     except:
                         img_out = np.copy(img_bin_corr)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                    
-                    ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                    ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                    
-                    batchcount+=1
-                    
-                    if batchcount>=n_batch:
-                        ret_x = ret_x/255.
-                        yield {"image": ret_x, "label": ret_y}
-                        ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                        ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                        batchcount = 0
-                        
+                    img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                    if to_yield: yield to_yield
                         
                 if textline_up_in_depth:
                     try:
-                        img_out  = do_up_in_depth(img)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                        img_out  = do_direction_in_depth(img, 'up')
+                    # TODO: qualify except
                     except:
                         img_out = np.copy(img)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                    
-                    ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                    ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                    
-                    batchcount+=1
-                    
-                    if batchcount>=n_batch:
-                        ret_x = ret_x/255.
-                        yield {"image": ret_x, "label": ret_y}
-                        ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                        ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                        batchcount = 0
-                        
+                    img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                    if to_yield: yield to_yield
                         
                 if textline_up_in_depth_bin:
                     try:
-                        img_out  = do_up_in_depth(img_bin_corr)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                        img_out  = do_direction_in_depth(img_bin_corr, 'up')
+                    # TODO: qualify except
                     except:
                         img_out = np.copy(img_bin_corr)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                    
-                    ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                    ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                    
-                    batchcount+=1
-                    
-                    if batchcount>=n_batch:
-                        ret_x = ret_x/255.
-                        yield {"image": ret_x, "label": ret_y}
-                        ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                        ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                        batchcount = 0
-                        
+                    img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                    if to_yield: yield to_yield
                         
                 if textline_down_in_depth:
                     try:
-                        img_out  = do_down_in_depth(img)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                        img_out  = do_direction_in_depth(img, 'down')
+                    # TODO: qualify except
                     except:
                         img_out = np.copy(img)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                    
-                    ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                    ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                    
-                    batchcount+=1
-                    
-                    if batchcount>=n_batch:
-                        ret_x = ret_x/255.
-                        yield {"image": ret_x, "label": ret_y}
-                        ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                        ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                        batchcount = 0
-                        
+                    img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                    if to_yield: yield to_yield
                         
                 if textline_down_in_depth_bin:
                     try:
-                        img_out  = do_down_in_depth(img_bin_corr)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
+                        img_out  = do_direction_in_depth(img_bin_corr, 'down')
+                    # TODO: qualify except
                     except:
                         img_out = np.copy(img_bin_corr)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                    
-                    ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                    ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                    
-                    batchcount+=1
-                    
-                    if batchcount>=n_batch:
-                        ret_x = ret_x/255.
-                        yield {"image": ret_x, "label": ret_y}
-                        ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                        ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                        batchcount = 0
+                    img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                    if to_yield: yield to_yield
                         
                 if pepper_bin_aug:
-                    for index, pepper_ind in enumerate(pepper_indexes):
+                    for pepper_ind in pepper_indexes:
                         img_out  = add_salt_and_pepper_noise(img_bin_corr, pepper_ind, pepper_ind)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                        
-                        ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                        
-                        batchcount+=1
-                        
-                        if batchcount>=n_batch:
-                            ret_x = ret_x/255.
-                            yield {"image": ret_x, "label": ret_y}
-                            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                            batchcount = 0
-                            
+                        img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                        if to_yield: yield to_yield
                             
                 if pepper_aug:
-                    for index, pepper_ind in enumerate(pepper_indexes):
+                    for pepper_ind in pepper_indexes:
                         img_out  = add_salt_and_pepper_noise(img, pepper_ind, pepper_ind)
-                        img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
-                        
-                        ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                        ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                        
-                        batchcount+=1
-                        
-                        if batchcount>=n_batch:
-                            ret_x = ret_x/255.
-                            yield {"image": ret_x, "label": ret_y}
-                            ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                            ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                            batchcount = 0
-                    
-                
+                        img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                        if to_yield: yield to_yield
                         
             else:
-                
-                img_out = scale_padd_image_for_ocr(img, input_height, input_width)
-                ret_x[batchcount, :,:,:] = img_out[:,:,:]
-                
-                ret_y[batchcount, :] =  vectorize_label(txt_inp, char_to_num, padding_token, max_len)
-                
-                batchcount+=1
-            
-                if batchcount>=n_batch:
-                    ret_x = ret_x/255.
-                    yield {"image": ret_x, "label": ret_y}
-                    ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
-                    ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
-                    batchcount = 0
+                img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y)
+                if to_yield: yield to_yield
 
 
-def return_multiplier_based_on_augmnentations(augmentation, color_padding_rotation, rotation_not_90, blur_aug, 
-                                             degrading, bin_deg, brightening, padding_white,adding_rgb_foreground, adding_rgb_background, binarization, image_inversion, channels_shuffling, add_red_textlines, white_noise_strap,
-                                             textline_skewing, textline_skewing_bin, textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth, textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, textline_down_in_depth, textline_down_in_depth_bin, pepper_bin_aug, pepper_aug, degrade_scales, number_of_backgrounds_per_image, thetha, thetha_padd, brightness, padd_colors, shuffle_indexes, pepper_indexes, skewing_amplitudes, blur_k, white_padds):
+# TODO: what is aug_multip and why calculate it in this way
+def return_multiplier_based_on_augmnentations(
+    augmentation,
+    color_padding_rotation,
+    rotation_not_90,
+    blur_aug,
+    degrading,
+    bin_deg,
+    brightening,
+    padding_white,
+    adding_rgb_foreground,
+    adding_rgb_background,
+    binarization,
+    image_inversion,
+    channels_shuffling,
+    add_red_textlines,
+    white_noise_strap,
+    textline_skewing,
+    textline_skewing_bin,
+    textline_left_in_depth,
+    textline_left_in_depth_bin,
+    textline_right_in_depth,
+    textline_right_in_depth_bin,
+    textline_up_in_depth,
+    textline_up_in_depth_bin,
+    textline_down_in_depth,
+    textline_down_in_depth_bin,
+    pepper_bin_aug,
+    pepper_aug,
+    degrade_scales,
+    number_of_backgrounds_per_image,
+    thetha,
+    thetha_padd,
+    brightness,
+    padd_colors,
+    shuffle_indexes,
+    pepper_indexes,
+    skewing_amplitudes,
+    blur_k,
+    white_padds,
+):
     aug_multip = 1
+    if not augmentation:
+        return 1
 
-    if augmentation:
-        if binarization:
-            aug_multip = aug_multip + 1
-        if image_inversion:
-            aug_multip = aug_multip + 1
-        if add_red_textlines:
-            aug_multip = aug_multip + 1
-        if white_noise_strap:
-            aug_multip = aug_multip + 1
-        if textline_right_in_depth:
-            aug_multip = aug_multip + 1
-        if textline_left_in_depth:
-            aug_multip = aug_multip + 1
-        if textline_up_in_depth:
-            aug_multip = aug_multip + 1
-        if textline_down_in_depth:
-            aug_multip = aug_multip + 1
-        if textline_right_in_depth_bin:
-            aug_multip = aug_multip + 1
-        if textline_left_in_depth_bin:
-            aug_multip = aug_multip + 1
-        if textline_up_in_depth_bin:
-            aug_multip = aug_multip + 1
-        if textline_down_in_depth_bin:
-            aug_multip = aug_multip + 1
-        if adding_rgb_foreground:
-            aug_multip = aug_multip + number_of_backgrounds_per_image
-        if adding_rgb_background:
-            aug_multip = aug_multip + number_of_backgrounds_per_image
-        if bin_deg:
-            aug_multip = aug_multip + len(degrade_scales)
-        if degrading:
-            aug_multip = aug_multip + len(degrade_scales)
-        if rotation_not_90:
-            aug_multip = aug_multip + len(thetha)
-        if textline_skewing:
-            aug_multip = aug_multip + len(skewing_amplitudes)
-        if textline_skewing_bin:
-            aug_multip = aug_multip + len(skewing_amplitudes)
-        if color_padding_rotation:
-            aug_multip = aug_multip + len(thetha_padd)*len(padd_colors)
-        if channels_shuffling:
-            aug_multip = aug_multip + len(shuffle_indexes)
-        if blur_aug:
-            aug_multip = aug_multip + len(blur_k)
-        if brightening:
-            aug_multip = aug_multip + len(brightness)
-        if padding_white:
-            aug_multip = aug_multip + len(white_padds)*len(padd_colors)
-        if pepper_aug:
-            aug_multip = aug_multip + len(pepper_indexes)
-        if pepper_bin_aug:
-            aug_multip = aug_multip + len(pepper_indexes)
+    if binarization:
+        aug_multip += 1
+    if image_inversion:
+        aug_multip += 1
+    if add_red_textlines:
+        aug_multip += 1
+    if white_noise_strap:
+        aug_multip += 1
+    if textline_right_in_depth:
+        aug_multip += 1
+    if textline_left_in_depth:
+        aug_multip += 1
+    if textline_up_in_depth:
+        aug_multip += 1
+    if textline_down_in_depth:
+        aug_multip += 1
+    if textline_right_in_depth_bin:
+        aug_multip += 1
+    if textline_left_in_depth_bin:
+        aug_multip += 1
+    if textline_up_in_depth_bin:
+        aug_multip += 1
+    if textline_down_in_depth_bin:
+        aug_multip += 1
+    if adding_rgb_foreground:
+        aug_multip += number_of_backgrounds_per_image
+    if adding_rgb_background:
+        aug_multip += number_of_backgrounds_per_image
+    if bin_deg:
+        aug_multip += len(degrade_scales)
+    if degrading:
+        aug_multip += len(degrade_scales)
+    if rotation_not_90:
+        aug_multip += len(thetha)
+    if textline_skewing:
+        aug_multip += len(skewing_amplitudes)
+    if textline_skewing_bin:
+        aug_multip += len(skewing_amplitudes)
+    if color_padding_rotation:
+        aug_multip += len(thetha_padd)*len(padd_colors)
+    if channels_shuffling:
+        aug_multip += len(shuffle_indexes)
+    if blur_aug:
+        aug_multip += len(blur_k)
+    if brightening:
+        aug_multip += len(brightness)
+    if padding_white:
+        aug_multip += len(white_padds)*len(padd_colors)
+    if pepper_aug:
+        aug_multip += len(pepper_indexes)
+    if pepper_bin_aug:
+        aug_multip += len(pepper_indexes)
             
     return aug_multip