diff --git a/src/eynollah/training/metrics.py b/src/eynollah/training/metrics.py index cd30b02..a8f47d7 100644 --- a/src/eynollah/training/metrics.py +++ b/src/eynollah/training/metrics.py @@ -147,6 +147,7 @@ def generalized_dice_loss(y_true, y_pred): return 1 - generalized_dice_coeff2(y_true, y_pred) +# TODO: document where this is from def soft_dice_loss(y_true, y_pred, epsilon=1e-6): """ Soft dice loss calculation for arbitrary batch size, number of classes, and number of spatial dimensions. @@ -175,6 +176,7 @@ def soft_dice_loss(y_true, y_pred, epsilon=1e-6): return 1.00 - K.mean(numerator / (denominator + epsilon)) # average over classes and batch +# TODO: document where this is from def seg_metrics(y_true, y_pred, metric_name, metric_type='standard', drop_last=True, mean_per_class=False, verbose=False): """ @@ -267,6 +269,8 @@ def seg_metrics(y_true, y_pred, metric_name, metric_type='standard', drop_last=T return K.mean(non_zero_sum / non_zero_count) +# TODO: document where this is from +# TODO: Why a different implementation than IoU from utils? def mean_iou(y_true, y_pred, **kwargs): """ Compute mean Intersection over Union of two segmentation masks, via Keras. @@ -311,6 +315,7 @@ def iou_vahid(y_true, y_pred): return K.mean(iou) +# TODO: copy from utils? def IoU_metric(Yi, y_predi): # mean Intersection over Union # Mean IoU = TP/(FN + TP + FP) @@ -337,6 +342,7 @@ def IoU_metric_keras(y_true, y_pred): return IoU_metric(y_true.eval(session=sess), y_pred.eval(session=sess)) +# TODO: unused, remove? def jaccard_distance_loss(y_true, y_pred, smooth=100): """ Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|) diff --git a/src/eynollah/training/models.py b/src/eynollah/training/models.py index d1b0aa2..9030096 100644 --- a/src/eynollah/training/models.py +++ b/src/eynollah/training/models.py @@ -5,6 +5,8 @@ from tensorflow.keras.layers import * from tensorflow.keras import layers from tensorflow.keras.regularizers import l2 +from eynollah.patch_encoder import Patches, PatchEncoder + ##mlp_head_units = [512, 256]#[2048, 1024] ###projection_dim = 64 ##transformer_layers = 2#8 @@ -38,87 +40,6 @@ def mlp(x, hidden_units, dropout_rate): x = layers.Dropout(dropout_rate)(x) return x -class Patches(layers.Layer): - def __init__(self, patch_size_x, patch_size_y):#__init__(self, **kwargs):#:__init__(self, patch_size):#__init__(self, **kwargs): - super(Patches, self).__init__() - self.patch_size_x = patch_size_x - self.patch_size_y = patch_size_y - - def call(self, images): - #print(tf.shape(images)[1],'images') - #print(self.patch_size,'self.patch_size') - batch_size = tf.shape(images)[0] - patches = tf.image.extract_patches( - images=images, - sizes=[1, self.patch_size_y, self.patch_size_x, 1], - strides=[1, self.patch_size_y, self.patch_size_x, 1], - rates=[1, 1, 1, 1], - padding="VALID", - ) - #patch_dims = patches.shape[-1] - patch_dims = tf.shape(patches)[-1] - patches = tf.reshape(patches, [batch_size, -1, patch_dims]) - return patches - def get_config(self): - - config = super().get_config().copy() - config.update({ - 'patch_size_x': self.patch_size_x, - 'patch_size_y': self.patch_size_y, - }) - return config - -class Patches_old(layers.Layer): - def __init__(self, patch_size):#__init__(self, **kwargs):#:__init__(self, patch_size):#__init__(self, **kwargs): - super(Patches, self).__init__() - self.patch_size = patch_size - - def call(self, images): - #print(tf.shape(images)[1],'images') - #print(self.patch_size,'self.patch_size') - batch_size = tf.shape(images)[0] - patches = tf.image.extract_patches( - images=images, - sizes=[1, self.patch_size, self.patch_size, 1], - strides=[1, self.patch_size, self.patch_size, 1], - rates=[1, 1, 1, 1], - padding="VALID", - ) - patch_dims = patches.shape[-1] - #print(patches.shape,patch_dims,'patch_dims') - patches = tf.reshape(patches, [batch_size, -1, patch_dims]) - return patches - def get_config(self): - - config = super().get_config().copy() - config.update({ - 'patch_size': self.patch_size, - }) - return config - - -class PatchEncoder(layers.Layer): - def __init__(self, num_patches, projection_dim): - super(PatchEncoder, self).__init__() - self.num_patches = num_patches - self.projection = layers.Dense(units=projection_dim) - self.position_embedding = layers.Embedding( - input_dim=num_patches, output_dim=projection_dim - ) - - def call(self, patch): - positions = tf.range(start=0, limit=self.num_patches, delta=1) - encoded = self.projection(patch) + self.position_embedding(positions) - return encoded - def get_config(self): - - config = super().get_config().copy() - config.update({ - 'num_patches': self.num_patches, - 'projection': self.projection, - 'position_embedding': self.position_embedding, - }) - return config def one_side_pad(x): diff --git a/src/eynollah/training/train.py b/src/eynollah/training/train.py index c15a562..7a0cb3d 100644 --- a/src/eynollah/training/train.py +++ b/src/eynollah/training/train.py @@ -175,22 +175,94 @@ def config_params(): characters_txt_file = None # Directory of characters text file needed for cnn_rnn_ocr model training. The file ends with .txt @ex.automain -def run(_config, n_classes, n_epochs, input_height, - input_width, weight_decay, weighted_loss, - index_start, dir_of_start_model, is_loss_soft_dice, - n_batch, patches, augmentation, flip_aug, - blur_aug, padding_white, padding_black, scaling, shifting, degrading,channels_shuffling, - brightening, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, blur_k, scales, degrade_scales,shuffle_indexes, - brightness, dir_train, data_is_provided, scaling_bluring, - scaling_brightness, scaling_binarization, rotation, rotation_not_90, - thetha, thetha_padd, scaling_flip, continue_training, transformer_projection_dim, - transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_cnn_first, - transformer_patchsize_x, transformer_patchsize_y, - transformer_num_patches_xy, backbone_type, save_interval, flip_index, dir_eval, dir_output, - pretraining, learning_rate, task, f1_threshold_classification, classification_classes_name, dir_img_bin, number_of_backgrounds_per_image,dir_rgb_backgrounds, - dir_rgb_foregrounds, characters_txt_file, color_padding_rotation, bin_deg, image_inversion, white_noise_strap, textline_skewing, textline_skewing_bin, - textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth, textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, - textline_down_in_depth, textline_down_in_depth_bin, pepper_bin_aug, pepper_aug, padd_colors, pepper_indexes, white_padds, skewing_amplitudes, max_len): +def run( + _config, + n_classes, + n_epochs, + input_height, + input_width, + weight_decay, + weighted_loss, + index_start, + dir_of_start_model, + is_loss_soft_dice, + n_batch, + patches, + augmentation, + flip_aug, + blur_aug, + padding_white, + padding_black, + scaling, + shifting, + degrading, + channels_shuffling, + brightening, + binarization, + adding_rgb_background, + adding_rgb_foreground, + add_red_textlines, + blur_k, + scales, + degrade_scales, + shuffle_indexes, + brightness, + dir_train, + data_is_provided, + scaling_bluring, + scaling_brightness, + scaling_binarization, + rotation, + rotation_not_90, + thetha, + thetha_padd, + scaling_flip, + continue_training, + transformer_projection_dim, + transformer_mlp_head_units, + transformer_layers, + transformer_num_heads, + transformer_cnn_first, + transformer_patchsize_x, + transformer_patchsize_y, + transformer_num_patches_xy, + backbone_type, + save_interval, + flip_index, + dir_eval, + dir_output, + pretraining, + learning_rate, + task, + f1_threshold_classification, + classification_classes_name, + dir_img_bin, + number_of_backgrounds_per_image, + dir_rgb_backgrounds, + dir_rgb_foregrounds, + characters_txt_file, + color_padding_rotation, + bin_deg, + image_inversion, + white_noise_strap, + textline_skewing, + textline_skewing_bin, + textline_left_in_depth, + textline_left_in_depth_bin, + textline_right_in_depth, + textline_right_in_depth_bin, + textline_up_in_depth, + textline_up_in_depth_bin, + textline_down_in_depth, + textline_down_in_depth_bin, + pepper_bin_aug, + pepper_aug, + padd_colors, + pepper_indexes, + white_padds, + skewing_amplitudes, + max_len, +): if dir_rgb_backgrounds: list_all_possible_background_images = os.listdir(dir_rgb_backgrounds) @@ -201,6 +273,10 @@ def run(_config, n_classes, n_epochs, input_height, list_all_possible_foreground_rgbs = os.listdir(dir_rgb_foregrounds) else: list_all_possible_foreground_rgbs = None + + dir_seg = None + weights = None + model = None if task == "segmentation" or task == "enhancement" or task == "binarization": if data_is_provided: @@ -285,6 +361,7 @@ def run(_config, n_classes, n_epochs, input_height, pass else: + assert dir_seg is not None for obj in os.listdir(dir_seg): try: label_obj = cv2.imread(dir_seg + '/' + obj) @@ -314,6 +391,8 @@ def run(_config, n_classes, n_epochs, input_height, model = load_model(dir_of_start_model, compile=True, custom_objects={'loss': weighted_categorical_crossentropy(weights)}) if not is_loss_soft_dice and not weighted_loss: model = load_model(dir_of_start_model , compile=True,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) + else: + raise ValueError("backbone_type must be 'nontransformer' or 'transformer'") else: index_start = 0 if backbone_type=='nontransformer': @@ -348,6 +427,7 @@ def run(_config, n_classes, n_epochs, input_height, sys.exit(1) model = vit_resnet50_unet_transformer_before_cnn(n_classes, transformer_patchsize_x, transformer_patchsize_y, num_patches, transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_projection_dim, input_height, input_width, task, weight_decay, pretraining) + assert model is not None #if you want to see the model structure just uncomment model summary. model.summary() @@ -377,9 +457,7 @@ def run(_config, n_classes, n_epochs, input_height, ##score_best=[] ##score_best.append(0) - if save_interval: - save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) - + save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) if save_interval else None for i in tqdm(range(index_start, n_epochs + index_start)): if save_interval: @@ -459,8 +537,7 @@ def run(_config, n_classes, n_epochs, input_height, opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)#1e-4)#(lr_schedule) model.compile(optimizer=opt) - if save_interval: - save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) + save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) if save_interval else None for i in tqdm(range(index_start, n_epochs + index_start)): if save_interval: @@ -559,8 +636,7 @@ def run(_config, n_classes, n_epochs, input_height, model.compile(loss="binary_crossentropy", optimizer = opt_adam,metrics=['accuracy']) - if save_interval: - save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) + save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config) if save_interval else None for i in range(n_epochs): if save_interval: diff --git a/src/eynollah/training/utils.py b/src/eynollah/training/utils.py index c589957..0a92935 100644 --- a/src/eynollah/training/utils.py +++ b/src/eynollah/training/utils.py @@ -38,6 +38,7 @@ def scale_padd_image_for_ocr(img, height, width): img_fin[:,:width_new,:] = img_res[:,:,:] return img_fin +# TODO: document where this is from def add_salt_and_pepper_noise(img, salt_prob, pepper_prob): """ Add salt-and-pepper noise to an image. @@ -108,14 +109,17 @@ def do_padding_for_ocr(img, percent_height, padding_color): if padding_color == 'white': img_new = np.ones((height_new, width_new, img.shape[2])).astype(float) * 255 - if padding_color == 'black': + elif padding_color == 'black': img_new = np.zeros((height_new, width_new, img.shape[2])).astype(float) + else: + raise ValueError("padding_color must be 'white' or 'black'") img_new[h_start:h_start + img.shape[0], w_start:w_start + img.shape[1], :] = np.copy(img[:, :, :]) return img_new +# TODO: document where this is from def do_deskewing(img, amplitude): height, width = img.shape[:2] @@ -133,107 +137,79 @@ def do_deskewing(img, amplitude): curved_image = cv2.remap(img, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT) return curved_image -def do_left_in_depth(img): +# TODO: document where this is from +def do_direction_in_depth(img, direction: str): height, width = img.shape[:2] - # Define the original corner points of the image - src_points = np.float32([ - [0, 0], # Top-left corner - [width, 0], # Top-right corner - [0, height], # Bottom-left corner - [width, height] # Bottom-right corner - ]) + if direction == 'left': + # Define the original corner points of the image + src_points = np.float32([ + [0, 0], # Top-left corner + [width, 0], # Top-right corner + [0, height], # Bottom-left corner + [width, height] # Bottom-right corner + ]) - # Define the new corner points for a subtle right-to-left tilt - dst_points = np.float32([ - [2, 13], # Slight inward shift for top-left - [width, 0], # Slight downward shift for top-right - [2, height-13], # Slight inward shift for bottom-left - [width, height] # Slight upward shift for bottom-right - ]) + # Define the new corner points for a subtle right-to-left tilt + dst_points = np.float32([ + [2, 13], # Slight inward shift for top-left + [width, 0], # Slight downward shift for top-right + [2, height-13], # Slight inward shift for bottom-left + [width, height] # Slight upward shift for bottom-right + ]) + elif direction == 'right': + # Define the original corner points of the image + src_points = np.float32([ + [0, 0], # Top-left corner + [width, 0], # Top-right corner + [0, height], # Bottom-left corner + [width, height] # Bottom-right corner + ]) - # Compute the perspective transformation matrix - matrix = cv2.getPerspectiveTransform(src_points, dst_points) + # Define the new corner points for a subtle right-to-left tilt + dst_points = np.float32([ + [0, 0], # Slight inward shift for top-left + [width, 13], # Slight downward shift for top-right + [0, height], # Slight inward shift for bottom-left + [width, height - 13] # Slight upward shift for bottom-right + ]) - # Apply the perspective warp - warped_image = cv2.warpPerspective(img, matrix, (width, height)) - return warped_image + elif direction == 'up': + # Define the original corner points of the image + src_points = np.float32([ + [0, 0], # Top-left corner + [width, 0], # Top-right corner + [0, height], # Bottom-left corner + [width, height] # Bottom-right corner + ]) -def do_right_in_depth(img): - height, width = img.shape[:2] + # Define the new corner points to simulate a tilted perspective + # Make the top part appear closer and the bottom part farther + dst_points = np.float32([ + [50, 0], # Top-left moved inward + [width - 50, 0], # Top-right moved inward + [0, height], # Bottom-left remains the same + [width, height] # Bottom-right remains the same + ]) + elif direction == 'down': + # Define the original corner points of the image + src_points = np.float32([ + [0, 0], # Top-left corner + [width, 0], # Top-right corner + [0, height], # Bottom-left corner + [width, height] # Bottom-right corner + ]) - # Define the original corner points of the image - src_points = np.float32([ - [0, 0], # Top-left corner - [width, 0], # Top-right corner - [0, height], # Bottom-left corner - [width, height] # Bottom-right corner - ]) - - # Define the new corner points for a subtle right-to-left tilt - dst_points = np.float32([ - [0, 0], # Slight inward shift for top-left - [width, 13], # Slight downward shift for top-right - [0, height], # Slight inward shift for bottom-left - [width, height - 13] # Slight upward shift for bottom-right - ]) - - # Compute the perspective transformation matrix - matrix = cv2.getPerspectiveTransform(src_points, dst_points) - - # Apply the perspective warp - warped_image = cv2.warpPerspective(img, matrix, (width, height)) - return warped_image - -def do_up_in_depth(img): - # Get the dimensions of the image - height, width = img.shape[:2] - - # Define the original corner points of the image - src_points = np.float32([ - [0, 0], # Top-left corner - [width, 0], # Top-right corner - [0, height], # Bottom-left corner - [width, height] # Bottom-right corner - ]) - - # Define the new corner points to simulate a tilted perspective - # Make the top part appear closer and the bottom part farther - dst_points = np.float32([ - [50, 0], # Top-left moved inward - [width - 50, 0], # Top-right moved inward - [0, height], # Bottom-left remains the same - [width, height] # Bottom-right remains the same - ]) - - # Compute the perspective transformation matrix - matrix = cv2.getPerspectiveTransform(src_points, dst_points) - - # Apply the perspective warp - warped_image = cv2.warpPerspective(img, matrix, (width, height)) - return warped_image - - -def do_down_in_depth(img): - # Get the dimensions of the image - height, width = img.shape[:2] - - # Define the original corner points of the image - src_points = np.float32([ - [0, 0], # Top-left corner - [width, 0], # Top-right corner - [0, height], # Bottom-left corner - [width, height] # Bottom-right corner - ]) - - # Define the new corner points to simulate a tilted perspective - # Make the top part appear closer and the bottom part farther - dst_points = np.float32([ - [0, 0], # Top-left moved inward - [width, 0], # Top-right moved inward - [50, height], # Bottom-left remains the same - [width - 50, height] # Bottom-right remains the same - ]) + # Define the new corner points to simulate a tilted perspective + # Make the top part appear closer and the bottom part farther + dst_points = np.float32([ + [0, 0], # Top-left moved inward + [width, 0], # Top-right moved inward + [50, height], # Bottom-left remains the same + [width - 50, height] # Bottom-right remains the same + ]) + else: + raise ValueError("direction must be 'left', 'right', 'up' or 'down'") # Compute the perspective transformation matrix matrix = cv2.getPerspectiveTransform(src_points, dst_points) @@ -254,6 +230,7 @@ def return_shuffled_channels(img, channels_order): img_sh[:,:,2]= img[:,:,channels_order[2]] return img_sh +# TODO: Refactor into one {{{ def return_binary_image_with_red_textlines(img_bin): img_red = np.copy(img_bin) @@ -308,6 +285,8 @@ def return_image_with_red_elements(img, img_bin): img_final[:,:,1][img_bin[:,:,0]==0] = 0 img_final[:,:,2][img_bin[:,:,0]==0] = 255 return img_final + +# }}} def shift_image_and_label(img, label, type_shift): h_n = int(img.shape[0]*1.06) @@ -513,9 +492,12 @@ def bluring(img_in, kind): img_blur = cv2.medianBlur(img_in, 5) elif kind == 'blur': img_blur = cv2.blur(img_in, (5, 5)) + else: + raise ValueError("kind must be 'gauss', 'median' or 'blur'") return img_blur +# TODO: document where this is from def elastic_transform(image, alpha, sigma, seedj, random_state=None): """Elastic deformation of images as described in [Simard2003]_. .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for @@ -538,6 +520,7 @@ def elastic_transform(image, alpha, sigma, seedj, random_state=None): return distored_image.reshape(image.shape) +# TODO: Use one of the utils/rotate.py functions for this def rotation_90(img): img_rot = np.zeros((img.shape[1], img.shape[0], img.shape[2])) img_rot[:, :, 0] = img[:, :, 0].T @@ -546,6 +529,8 @@ def rotation_90(img): return img_rot +# TODO: document where this is from +# TODO: Use one of the utils/rotate.py functions for this def rotatedRectWithMaxArea(w, h, angle): """ Given a rectangle of size wxh that has been rotated by 'angle' (in @@ -574,6 +559,7 @@ def rotatedRectWithMaxArea(w, h, angle): return wr, hr +# TODO: Use one of the utils/rotate.py functions for this def rotate_max_area(image, rotated, rotated_label, angle): """ image: cv2 image matrix object angle: in degree @@ -587,6 +573,7 @@ def rotate_max_area(image, rotated, rotated_label, angle): x2 = x1 + int(wr) return rotated[y1:y2, x1:x2], rotated_label[y1:y2, x1:x2] +# TODO: Use one of the utils/rotate.py functions for this def rotate_max_area_single_image(image, rotated, angle): """ image: cv2 image matrix object angle: in degree @@ -600,12 +587,14 @@ def rotate_max_area_single_image(image, rotated, angle): x2 = x1 + int(wr) return rotated[y1:y2, x1:x2] +# TODO: Use one of the utils/rotate.py functions for this def rotation_not_90_func(img, label, thetha): rotated = imutils.rotate(img, thetha) rotated_label = imutils.rotate(label, thetha) return rotate_max_area(img, rotated, rotated_label, thetha) +# TODO: Use one of the utils/rotate.py functions for this def rotation_not_90_func_single_image(img, thetha): rotated = imutils.rotate(img, thetha) return rotate_max_area_single_image(img, rotated, thetha) @@ -628,6 +617,7 @@ def color_images(seg, n_classes): return seg_img +# TODO: use resize_image from utils def resize_image(seg_in, input_height, input_width): return cv2.resize(seg_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) @@ -640,6 +630,7 @@ def get_one_hot(seg, input_height, input_width, n_classes): return seg_f +# TODO: document where this is from def IoU(Yi, y_predi): ## mean Intersection over Union ## Mean IoU = TP/(FN + TP + FP) @@ -739,6 +730,7 @@ def data_gen(img_folder, mask_folder, batch_size, input_height, input_width, n_c yield img, mask +# TODO: Use otsu_copy from utils def otsu_copy(img): img_r = np.zeros(img.shape) img1 = img[:, :, 0] @@ -796,14 +788,13 @@ def get_patches(dir_img_f, dir_seg_f, img, label, height, width, indexer): return indexer -def do_padding_white(img): - img_org_h = img.shape[0] - img_org_w = img.shape[1] - +def do_padding_with_color(img, padding_color='black'): index_start_h = 4 index_start_w = 4 - img_padded = np.zeros((img.shape[0] + 2*index_start_h, img.shape[1]+ 2*index_start_w, img.shape[2])) + 255 + img_padded = np.zeros((img.shape[0] + 2*index_start_h, img.shape[1]+ 2*index_start_w, img.shape[2])) + if padding_color == 'white': + img_padded += 255 img_padded[index_start_h: index_start_h + img.shape[0], index_start_w: index_start_w + img.shape[1], :] = img[:, :, :] return img_padded.astype(float) @@ -817,20 +808,7 @@ def do_degrading(img, scale): return resize_image(img_res, img_org_h, img_org_w) - -def do_padding_black(img): - img_org_h = img.shape[0] - img_org_w = img.shape[1] - - index_start_h = 4 - index_start_w = 4 - - img_padded = np.zeros((img.shape[0] + 2*index_start_h, img.shape[1] + 2*index_start_w, img.shape[2])) - img_padded[index_start_h: index_start_h + img.shape[0], index_start_w: index_start_w + img.shape[1], :] = img[:, :, :] - - return img_padded.astype(float) - - +# TODO: How is this different from do_padding_black? def do_padding_label(img): img_org_h = img.shape[0] img_org_w = img.shape[1] @@ -867,57 +845,6 @@ def do_padding(img, label, height, width): return img_new,label_new -def get_patches_num_scale(dir_img_f, dir_seg_f, img, label, height, width, indexer, n_patches, scaler): - if img.shape[0] < height or img.shape[1] < width: - img, label = do_padding(img, label, height, width) - - img_h = img.shape[0] - img_w = img.shape[1] - - height_scale = int(height * scaler) - width_scale = int(width * scaler) - - - nxf = img_w / float(width_scale) - nyf = img_h / float(height_scale) - - if nxf > int(nxf): - nxf = int(nxf) + 1 - if nyf > int(nyf): - nyf = int(nyf) + 1 - - nxf = int(nxf) - nyf = int(nyf) - - for i in range(nxf): - for j in range(nyf): - index_x_d = i * width_scale - index_x_u = (i + 1) * width_scale - - index_y_d = j * height_scale - index_y_u = (j + 1) * height_scale - - if index_x_u > img_w: - index_x_u = img_w - index_x_d = img_w - width_scale - if index_y_u > img_h: - index_y_u = img_h - index_y_d = img_h - height_scale - - - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_patch = label[index_y_d:index_y_u, index_x_d:index_x_u, :] - - img_patch = resize_image(img_patch, height, width) - label_patch = resize_image(label_patch, height, width) - - cv2.imwrite(dir_img_f + '/img_' + str(indexer) + '.png', img_patch) - cv2.imwrite(dir_seg_f + '/img_' + str(indexer) + '.png', label_patch) - indexer += 1 - - return indexer - - def get_patches_num_scale_new(dir_img_f, dir_seg_f, img, label, height, width, indexer, scaler): img = resize_image(img, int(img.shape[0] * scaler), int(img.shape[1] * scaler)) label = resize_image(label, int(label.shape[0] * scaler), int(label.shape[1] * scaler)) @@ -967,13 +894,55 @@ def get_patches_num_scale_new(dir_img_f, dir_seg_f, img, label, height, width, i return indexer -def provide_patches(imgs_list_train, segs_list_train, dir_img, dir_seg, dir_flow_train_imgs, - dir_flow_train_labels, input_height, input_width, blur_k, blur_aug, - padding_white, padding_black, flip_aug, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, channels_shuffling, scaling, shifting, degrading, - brightening, scales, degrade_scales, brightness, flip_index, shuffle_indexes, - scaling_bluring, scaling_brightness, scaling_binarization, rotation, - rotation_not_90, thetha, scaling_flip, task, augmentation=False, patches=False, dir_img_bin=None,number_of_backgrounds_per_image=None,list_all_possible_background_images=None, dir_rgb_backgrounds=None, dir_rgb_foregrounds=None, list_all_possible_foreground_rgbs=None): +# TODO: (far) too many args +# TODO: refactor to combine with data_gen_ocr +def provide_patches( + imgs_list_train, + segs_list_train, + dir_img, + dir_seg, + dir_flow_train_imgs, + dir_flow_train_labels, + input_height, + input_width, + blur_k, + blur_aug, + padding_white, + padding_black, + flip_aug, + binarization, + adding_rgb_background, + adding_rgb_foreground, + add_red_textlines, + channels_shuffling, + scaling, + shifting, + degrading, + brightening, + scales, + degrade_scales, + brightness, + flip_index, + shuffle_indexes, + scaling_bluring, + scaling_brightness, + scaling_binarization, + rotation, + rotation_not_90, + thetha, + scaling_flip, + task, + augmentation=False, + patches=False, + dir_img_bin=None, + number_of_backgrounds_per_image=None, + list_all_possible_background_images=None, + dir_rgb_backgrounds=None, + dir_rgb_foregrounds=None, + list_all_possible_foreground_rgbs=None, +): + # TODO: why sepoarate var if you have seg_i? indexer = 0 for im, seg_i in tqdm(zip(imgs_list_train, segs_list_train)): img_name = os.path.splitext(im)[0] @@ -1282,26 +1251,88 @@ def provide_patches(imgs_list_train, segs_list_train, dir_img, dir_seg, dir_flow for sc_ind in scales: for f_i in flip_index: indexer = get_patches_num_scale_new(dir_flow_train_imgs, dir_flow_train_labels, + cv2.flip( cv2.imread(dir_img + '/' + im), f_i), cv2.flip(cv2.imread(dir_of_label_file), f_i), input_height, input_width, indexer=indexer, scaler=sc_ind) -def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir_train, ls_files_images, - augmentation, color_padding_rotation, rotation_not_90, blur_aug, degrading, bin_deg, brightening, padding_white, - adding_rgb_foreground, adding_rgb_background, binarization, image_inversion, channels_shuffling, add_red_textlines, white_noise_strap, - textline_skewing, textline_skewing_bin, textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth, - textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, textline_down_in_depth, textline_down_in_depth_bin, - pepper_bin_aug, pepper_aug, degrade_scales, number_of_backgrounds_per_image, thetha, thetha_padd, brightness, padd_colors, - shuffle_indexes, pepper_indexes, skewing_amplitudes, blur_k, char_to_num, list_all_possible_background_images, - list_all_possible_foreground_rgbs, dir_rgb_backgrounds, dir_rgb_foregrounds, white_padds, dir_img_bin=None): +def data_gen_ocr( + padding_token, + n_batch, + input_height, + input_width, + max_len, + dir_train, + ls_files_images, + augmentation, + color_padding_rotation, + rotation_not_90, + blur_aug, + degrading, + bin_deg, + brightening, + padding_white, + adding_rgb_foreground, + adding_rgb_background, + binarization, + image_inversion, + channels_shuffling, + add_red_textlines, + white_noise_strap, + textline_skewing, + textline_skewing_bin, + textline_left_in_depth, + textline_left_in_depth_bin, + textline_right_in_depth, + textline_right_in_depth_bin, + textline_up_in_depth, + textline_up_in_depth_bin, + textline_down_in_depth, + textline_down_in_depth_bin, + pepper_bin_aug, + pepper_aug, + degrade_scales, + number_of_backgrounds_per_image, + thetha, + thetha_padd, + brightness, + padd_colors, + shuffle_indexes, + pepper_indexes, + skewing_amplitudes, + blur_k, + char_to_num, + list_all_possible_background_images, + list_all_possible_foreground_rgbs, + dir_rgb_backgrounds, + dir_rgb_foregrounds, + white_padds, + dir_img_bin=None, +): random.shuffle(ls_files_images) ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token batchcount = 0 + + def increment_batchcount(img_out, batchcount, ret_x, ret_y): + to_yield = None + img_out = scale_padd_image_for_ocr(img, input_height, input_width) + ret_x[batchcount, :,:,:] = img_out[:,:,:] + ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) + batchcount += 1 + if batchcount>=n_batch: + ret_x = ret_x/255. + to_yield = {"image": ret_x, "label": ret_y} + ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) + ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token + batchcount = 0 + return img_out, batchcount, ret_x, ret_y, to_yield + + # TODO: Why while True + yield, why not return a list? while True: for i in ls_files_images: f_name = i.split('.')[0] @@ -1316,153 +1347,65 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir if augmentation: - img_out = scale_padd_image_for_ocr(img, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if color_padding_rotation: - for index, thetha_ind in enumerate(thetha_padd): + for thetha_ind in thetha_padd: for padd_col in padd_colors: img_out = rotation_not_90_func_single_image(do_padding_for_ocr(img, 1.2, padd_col), thetha_ind) - - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if rotation_not_90: - for index, thetha_ind in enumerate(thetha): + for thetha_ind in thetha: img_out = rotation_not_90_func_single_image(img, thetha_ind) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if blur_aug: - for index, blur_type in enumerate(blur_k): + for blur_type in blur_k: img_out = bluring(img, blur_type) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield + if degrading: - for index, deg_scale_ind in enumerate(degrade_scales): + for deg_scale_ind in degrade_scales: try: img_out = do_degrading(img, deg_scale_ind) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + # TODO: qualify except except: img_out = np.copy(img) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if bin_deg: - for index, deg_scale_ind in enumerate(degrade_scales): + for deg_scale_ind in degrade_scales: try: img_out = do_degrading(img_bin_corr, deg_scale_ind) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + # TODO: qualify except except: img_out = np.copy(img_bin_corr) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if brightening: - for index, bright_scale_ind in enumerate(brightness): + for bright_scale_ind in brightness: try: + # FIXME: dir_img is not defined in this scope, will always fail img_out = do_brightening(dir_img, bright_scale_ind) + # TODO: qualify except except: img_out = np.copy(img) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if padding_white: - for index, padding_size in enumerate(white_padds): + for padding_size in white_padds: for padd_col in padd_colors: img_out = do_padding_for_ocr(img, padding_size, padd_col) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if adding_rgb_foreground: for i_n in range(number_of_backgrounds_per_image): @@ -1472,448 +1415,252 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir img_rgb_background_chosen = cv2.imread(dir_rgb_backgrounds + '/' + background_image_chosen_name) foreground_rgb_chosen = np.load(dir_rgb_foregrounds + '/' + foreground_rgb_chosen_name) - img_with_overlayed_background = return_binary_image_with_given_rgb_background_and_given_foreground_rgb(img_bin_corr, img_rgb_background_chosen, foreground_rgb_chosen) + img_out = return_binary_image_with_given_rgb_background_and_given_foreground_rgb(img_bin_corr, img_rgb_background_chosen, foreground_rgb_chosen) - img_out = scale_padd_image_for_ocr(img_with_overlayed_background, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if adding_rgb_background: for i_n in range(number_of_backgrounds_per_image): background_image_chosen_name = random.choice(list_all_possible_background_images) img_rgb_background_chosen = cv2.imread(dir_rgb_backgrounds + '/' + background_image_chosen_name) - img_with_overlayed_background = return_binary_image_with_given_rgb_background(img_bin_corr, img_rgb_background_chosen) - - img_out = scale_padd_image_for_ocr(img_with_overlayed_background, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out = return_binary_image_with_given_rgb_background(img_bin_corr, img_rgb_background_chosen) + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if binarization: img_out = scale_padd_image_for_ocr(img_bin_corr, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if image_inversion: img_out = invert_image(img_bin_corr) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :, :, :] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x = np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y = np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if channels_shuffling: for shuffle_index in shuffle_indexes: img_out = return_shuffled_channels(img, shuffle_index) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if add_red_textlines: - img_red_context = return_image_with_red_elements(img, img_bin_corr) - - img_out = scale_padd_image_for_ocr(img_red_context, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out = return_image_with_red_elements(img, img_bin_corr) + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if white_noise_strap: img_out = return_image_with_strapped_white_noises(img) - - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if textline_skewing: - for index, des_scale_ind in enumerate(skewing_amplitudes): + for des_scale_ind in skewing_amplitudes: try: img_out = do_deskewing(img, des_scale_ind) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + # TODO: qualify except except: img_out = np.copy(img) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if textline_skewing_bin: - for index, des_scale_ind in enumerate(skewing_amplitudes): + for des_scale_ind in skewing_amplitudes: try: img_out = do_deskewing(img_bin_corr, des_scale_ind) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + # TODO: qualify except except: img_out = np.copy(img_bin_corr) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if textline_left_in_depth: try: - img_out = do_left_in_depth(img) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + img_out = do_direction_in_depth(img, 'left') + # TODO: qualify except except: img_out = np.copy(img) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if textline_left_in_depth_bin: try: - img_out = do_left_in_depth(img_bin_corr) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + img_out = do_direction_in_depth(img_bin_corr, 'left') + # TODO: qualify except except: img_out = np.copy(img_bin_corr) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if textline_right_in_depth: try: - img_out = do_right_in_depth(img) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + img_out = do_direction_in_depth(img_bin_corr, 'right') + # TODO: qualify except except: img_out = np.copy(img) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - if textline_right_in_depth_bin: try: - img_out = do_right_in_depth(img_bin_corr) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + img_out = do_direction_in_depth(img_bin_corr, 'right') + # TODO: qualify except except: img_out = np.copy(img_bin_corr) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if textline_up_in_depth: try: - img_out = do_up_in_depth(img) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + img_out = do_direction_in_depth(img, 'up') + # TODO: qualify except except: img_out = np.copy(img) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if textline_up_in_depth_bin: try: - img_out = do_up_in_depth(img_bin_corr) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + img_out = do_direction_in_depth(img_bin_corr, 'up') + # TODO: qualify except except: img_out = np.copy(img_bin_corr) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if textline_down_in_depth: try: - img_out = do_down_in_depth(img) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + img_out = do_direction_in_depth(img, 'down') + # TODO: qualify except except: img_out = np.copy(img) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if textline_down_in_depth_bin: try: - img_out = do_down_in_depth(img_bin_corr) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) + img_out = do_direction_in_depth(img_bin_corr, 'down') + # TODO: qualify except except: img_out = np.copy(img_bin_corr) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if pepper_bin_aug: - for index, pepper_ind in enumerate(pepper_indexes): + for pepper_ind in pepper_indexes: img_out = add_salt_and_pepper_noise(img_bin_corr, pepper_ind, pepper_ind) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield if pepper_aug: - for index, pepper_ind in enumerate(pepper_indexes): + for pepper_ind in pepper_indexes: img_out = add_salt_and_pepper_noise(img, pepper_ind, pepper_ind) - img_out = scale_padd_image_for_ocr(img_out, input_height, input_width) - - ret_x[batchcount, :,:,:] = img_out[:,:,:] - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 - - + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield else: - - img_out = scale_padd_image_for_ocr(img, input_height, input_width) - ret_x[batchcount, :,:,:] = img_out[:,:,:] - - ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len) - - batchcount+=1 - - if batchcount>=n_batch: - ret_x = ret_x/255. - yield {"image": ret_x, "label": ret_y} - ret_x= np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32) - ret_y= np.zeros((n_batch, max_len)).astype(np.int16)+padding_token - batchcount = 0 + img_out, batchcount, ret_x, ret_y, to_yield = increment_batchcount(img_out, batchcount, ret_x, ret_y) + if to_yield: yield to_yield -def return_multiplier_based_on_augmnentations(augmentation, color_padding_rotation, rotation_not_90, blur_aug, - degrading, bin_deg, brightening, padding_white,adding_rgb_foreground, adding_rgb_background, binarization, image_inversion, channels_shuffling, add_red_textlines, white_noise_strap, - textline_skewing, textline_skewing_bin, textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth, textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, textline_down_in_depth, textline_down_in_depth_bin, pepper_bin_aug, pepper_aug, degrade_scales, number_of_backgrounds_per_image, thetha, thetha_padd, brightness, padd_colors, shuffle_indexes, pepper_indexes, skewing_amplitudes, blur_k, white_padds): +# TODO: what is aug_multip and why calculate it in this way +def return_multiplier_based_on_augmnentations( + augmentation, + color_padding_rotation, + rotation_not_90, + blur_aug, + degrading, + bin_deg, + brightening, + padding_white, + adding_rgb_foreground, + adding_rgb_background, + binarization, + image_inversion, + channels_shuffling, + add_red_textlines, + white_noise_strap, + textline_skewing, + textline_skewing_bin, + textline_left_in_depth, + textline_left_in_depth_bin, + textline_right_in_depth, + textline_right_in_depth_bin, + textline_up_in_depth, + textline_up_in_depth_bin, + textline_down_in_depth, + textline_down_in_depth_bin, + pepper_bin_aug, + pepper_aug, + degrade_scales, + number_of_backgrounds_per_image, + thetha, + thetha_padd, + brightness, + padd_colors, + shuffle_indexes, + pepper_indexes, + skewing_amplitudes, + blur_k, + white_padds, +): aug_multip = 1 + if not augmentation: + return 1 - if augmentation: - if binarization: - aug_multip = aug_multip + 1 - if image_inversion: - aug_multip = aug_multip + 1 - if add_red_textlines: - aug_multip = aug_multip + 1 - if white_noise_strap: - aug_multip = aug_multip + 1 - if textline_right_in_depth: - aug_multip = aug_multip + 1 - if textline_left_in_depth: - aug_multip = aug_multip + 1 - if textline_up_in_depth: - aug_multip = aug_multip + 1 - if textline_down_in_depth: - aug_multip = aug_multip + 1 - if textline_right_in_depth_bin: - aug_multip = aug_multip + 1 - if textline_left_in_depth_bin: - aug_multip = aug_multip + 1 - if textline_up_in_depth_bin: - aug_multip = aug_multip + 1 - if textline_down_in_depth_bin: - aug_multip = aug_multip + 1 - if adding_rgb_foreground: - aug_multip = aug_multip + number_of_backgrounds_per_image - if adding_rgb_background: - aug_multip = aug_multip + number_of_backgrounds_per_image - if bin_deg: - aug_multip = aug_multip + len(degrade_scales) - if degrading: - aug_multip = aug_multip + len(degrade_scales) - if rotation_not_90: - aug_multip = aug_multip + len(thetha) - if textline_skewing: - aug_multip = aug_multip + len(skewing_amplitudes) - if textline_skewing_bin: - aug_multip = aug_multip + len(skewing_amplitudes) - if color_padding_rotation: - aug_multip = aug_multip + len(thetha_padd)*len(padd_colors) - if channels_shuffling: - aug_multip = aug_multip + len(shuffle_indexes) - if blur_aug: - aug_multip = aug_multip + len(blur_k) - if brightening: - aug_multip = aug_multip + len(brightness) - if padding_white: - aug_multip = aug_multip + len(white_padds)*len(padd_colors) - if pepper_aug: - aug_multip = aug_multip + len(pepper_indexes) - if pepper_bin_aug: - aug_multip = aug_multip + len(pepper_indexes) + if binarization: + aug_multip += 1 + if image_inversion: + aug_multip += 1 + if add_red_textlines: + aug_multip += 1 + if white_noise_strap: + aug_multip += 1 + if textline_right_in_depth: + aug_multip += 1 + if textline_left_in_depth: + aug_multip += 1 + if textline_up_in_depth: + aug_multip += 1 + if textline_down_in_depth: + aug_multip += 1 + if textline_right_in_depth_bin: + aug_multip += 1 + if textline_left_in_depth_bin: + aug_multip += 1 + if textline_up_in_depth_bin: + aug_multip += 1 + if textline_down_in_depth_bin: + aug_multip += 1 + if adding_rgb_foreground: + aug_multip += number_of_backgrounds_per_image + if adding_rgb_background: + aug_multip += number_of_backgrounds_per_image + if bin_deg: + aug_multip += len(degrade_scales) + if degrading: + aug_multip += len(degrade_scales) + if rotation_not_90: + aug_multip += len(thetha) + if textline_skewing: + aug_multip += len(skewing_amplitudes) + if textline_skewing_bin: + aug_multip += len(skewing_amplitudes) + if color_padding_rotation: + aug_multip += len(thetha_padd)*len(padd_colors) + if channels_shuffling: + aug_multip += len(shuffle_indexes) + if blur_aug: + aug_multip += len(blur_k) + if brightening: + aug_multip += len(brightness) + if padding_white: + aug_multip += len(white_padds)*len(padd_colors) + if pepper_aug: + aug_multip += len(pepper_indexes) + if pepper_bin_aug: + aug_multip += len(pepper_indexes) return aug_multip