mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-12-15 15:44:13 +01:00
The cnn-rnn ocr model can be trained now
This commit is contained in:
parent
84a72a128b
commit
4fc3ff33cb
3 changed files with 138 additions and 57 deletions
|
|
@ -138,8 +138,10 @@ def config_params():
|
||||||
scaling_brightness = False # If true, a combination of scaling and brightening will be applied to the image.
|
scaling_brightness = False # If true, a combination of scaling and brightening will be applied to the image.
|
||||||
scaling_flip = False # If true, a combination of scaling and flipping will be applied to the image.
|
scaling_flip = False # If true, a combination of scaling and flipping will be applied to the image.
|
||||||
thetha = None # Rotate image by these angles for augmentation.
|
thetha = None # Rotate image by these angles for augmentation.
|
||||||
|
thetha_padd = None # List of angles used for rotation alongside padding
|
||||||
shuffle_indexes = None # List of shuffling indexes like [[0,2,1], [1,2,0], [1,0,2]]
|
shuffle_indexes = None # List of shuffling indexes like [[0,2,1], [1,2,0], [1,0,2]]
|
||||||
pepper_indexes = None # List of pepper noise indexes like [0.01, 0.005]
|
pepper_indexes = None # List of pepper noise indexes like [0.01, 0.005]
|
||||||
|
white_padds = None # List of padding size in the case of white padding
|
||||||
skewing_amplitudes = None # List of skewing augmentation amplitudes like [5, 8]
|
skewing_amplitudes = None # List of skewing augmentation amplitudes like [5, 8]
|
||||||
blur_k = None # Blur image for augmentation.
|
blur_k = None # Blur image for augmentation.
|
||||||
scales = None # Scale patches for augmentation.
|
scales = None # Scale patches for augmentation.
|
||||||
|
|
@ -181,14 +183,14 @@ def run(_config, n_classes, n_epochs, input_height,
|
||||||
brightening, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, blur_k, scales, degrade_scales,shuffle_indexes,
|
brightening, binarization, adding_rgb_background, adding_rgb_foreground, add_red_textlines, blur_k, scales, degrade_scales,shuffle_indexes,
|
||||||
brightness, dir_train, data_is_provided, scaling_bluring,
|
brightness, dir_train, data_is_provided, scaling_bluring,
|
||||||
scaling_brightness, scaling_binarization, rotation, rotation_not_90,
|
scaling_brightness, scaling_binarization, rotation, rotation_not_90,
|
||||||
thetha, scaling_flip, continue_training, transformer_projection_dim,
|
thetha, thetha_padd, scaling_flip, continue_training, transformer_projection_dim,
|
||||||
transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_cnn_first,
|
transformer_mlp_head_units, transformer_layers, transformer_num_heads, transformer_cnn_first,
|
||||||
transformer_patchsize_x, transformer_patchsize_y,
|
transformer_patchsize_x, transformer_patchsize_y,
|
||||||
transformer_num_patches_xy, backbone_type, save_interval, flip_index, dir_eval, dir_output,
|
transformer_num_patches_xy, backbone_type, save_interval, flip_index, dir_eval, dir_output,
|
||||||
pretraining, learning_rate, task, f1_threshold_classification, classification_classes_name, dir_img_bin, number_of_backgrounds_per_image,dir_rgb_backgrounds,
|
pretraining, learning_rate, task, f1_threshold_classification, classification_classes_name, dir_img_bin, number_of_backgrounds_per_image,dir_rgb_backgrounds,
|
||||||
dir_rgb_foregrounds, characters_txt_file, color_padding_rotation, bin_deg, image_inversion, white_noise_strap, textline_skewing, textline_skewing_bin,
|
dir_rgb_foregrounds, characters_txt_file, color_padding_rotation, bin_deg, image_inversion, white_noise_strap, textline_skewing, textline_skewing_bin,
|
||||||
textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth, textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin,
|
textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth, textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin,
|
||||||
textline_down_in_depth, textline_down_in_depth_bin, pepper_bin_aug, pepper_aug, padd_colors, pepper_indexes, skewing_amplitudes, max_len):
|
textline_down_in_depth, textline_down_in_depth_bin, pepper_bin_aug, pepper_aug, padd_colors, pepper_indexes, white_padds, skewing_amplitudes, max_len):
|
||||||
|
|
||||||
if dir_rgb_backgrounds:
|
if dir_rgb_backgrounds:
|
||||||
list_all_possible_background_images = os.listdir(dir_rgb_backgrounds)
|
list_all_possible_background_images = os.listdir(dir_rgb_backgrounds)
|
||||||
|
|
@ -433,7 +435,7 @@ def run(_config, n_classes, n_epochs, input_height,
|
||||||
|
|
||||||
aug_multip = return_multiplier_based_on_augmnentations(augmentation, color_padding_rotation, rotation_not_90, blur_aug, degrading, bin_deg,
|
aug_multip = return_multiplier_based_on_augmnentations(augmentation, color_padding_rotation, rotation_not_90, blur_aug, degrading, bin_deg,
|
||||||
brightening, padding_white, adding_rgb_foreground, adding_rgb_background, binarization,
|
brightening, padding_white, adding_rgb_foreground, adding_rgb_background, binarization,
|
||||||
image_inversion, channels_shuffling, add_red_textlines, white_noise_strap, textline_skewing, textline_skewing_bin, textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth, textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, textline_down_in_depth, textline_down_in_depth_bin, pepper_bin_aug, pepper_aug, degrade_scales, number_of_backgrounds_per_image, thetha, brightness, padd_colors, shuffle_indexes, pepper_indexes, skewing_amplitudes)
|
image_inversion, channels_shuffling, add_red_textlines, white_noise_strap, textline_skewing, textline_skewing_bin, textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth, textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, textline_down_in_depth, textline_down_in_depth_bin, pepper_bin_aug, pepper_aug, degrade_scales, number_of_backgrounds_per_image, thetha, thetha_padd, brightness, padd_colors, shuffle_indexes, pepper_indexes, skewing_amplitudes, blur_k, white_padds)
|
||||||
|
|
||||||
len_dataset = aug_multip*len(ls_files_images)
|
len_dataset = aug_multip*len(ls_files_images)
|
||||||
|
|
||||||
|
|
@ -442,10 +444,41 @@ def run(_config, n_classes, n_epochs, input_height,
|
||||||
adding_rgb_foreground, adding_rgb_background, binarization, image_inversion, channels_shuffling, add_red_textlines, white_noise_strap,
|
adding_rgb_foreground, adding_rgb_background, binarization, image_inversion, channels_shuffling, add_red_textlines, white_noise_strap,
|
||||||
textline_skewing, textline_skewing_bin, textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth,
|
textline_skewing, textline_skewing_bin, textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth,
|
||||||
textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, textline_down_in_depth, textline_down_in_depth_bin,
|
textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, textline_down_in_depth, textline_down_in_depth_bin,
|
||||||
pepper_bin_aug, pepper_aug, degrade_scales, number_of_backgrounds_per_image, thetha, brightness, padd_colors,
|
pepper_bin_aug, pepper_aug, degrade_scales, number_of_backgrounds_per_image, thetha, thetha_padd, brightness, padd_colors,
|
||||||
shuffle_indexes, pepper_indexes, skewing_amplitudes, dir_img_bin)
|
shuffle_indexes, pepper_indexes, skewing_amplitudes, blur_k, char_to_num, list_all_possible_background_images, list_all_possible_foreground_rgbs,
|
||||||
|
dir_rgb_backgrounds, dir_rgb_foregrounds, white_padds, dir_img_bin)
|
||||||
|
|
||||||
print(len_dataset, 'len_dataset')
|
initial_learning_rate = 1e-4
|
||||||
|
decay_steps = int (n_epochs * ( len_dataset / n_batch ))
|
||||||
|
alpha = 0.01
|
||||||
|
lr_schedule = 1e-4#tf.keras.optimizers.schedules.CosineDecay(initial_learning_rate, decay_steps, alpha)
|
||||||
|
|
||||||
|
opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)#1e-4)#(lr_schedule)
|
||||||
|
model.compile(optimizer=opt)
|
||||||
|
|
||||||
|
if save_interval:
|
||||||
|
save_weights_callback = SaveWeightsAfterSteps(save_interval, dir_output, _config)
|
||||||
|
|
||||||
|
indexer_start = 0
|
||||||
|
for i in range(n_epochs):
|
||||||
|
if save_interval:
|
||||||
|
model.fit(
|
||||||
|
train_ds,
|
||||||
|
steps_per_epoch=len_dataset / n_batch,
|
||||||
|
epochs=1,
|
||||||
|
callbacks=[save_weights_callback]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
model.fit(
|
||||||
|
train_ds,
|
||||||
|
steps_per_epoch=len_dataset / n_batch,
|
||||||
|
epochs=1
|
||||||
|
)
|
||||||
|
|
||||||
|
if i >=0:
|
||||||
|
model.save( os.path.join(dir_output,'model_'+str(i+indexer_start) ))
|
||||||
|
with open(os.path.join(os.path.join(dir_output,'model_'+str(i)),"config.json"), "w") as fp:
|
||||||
|
json.dump(_config, fp) # encode dict into JSON
|
||||||
|
|
||||||
elif task=='classification':
|
elif task=='classification':
|
||||||
configuration()
|
configuration()
|
||||||
|
|
|
||||||
|
|
@ -9,11 +9,35 @@ from scipy.ndimage.interpolation import map_coordinates
|
||||||
from scipy.ndimage.filters import gaussian_filter
|
from scipy.ndimage.filters import gaussian_filter
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import imutils
|
import imutils
|
||||||
|
import tensorflow as tf
|
||||||
from tensorflow.keras.utils import to_categorical
|
from tensorflow.keras.utils import to_categorical
|
||||||
from PIL import Image, ImageFile, ImageEnhance
|
from PIL import Image, ImageFile, ImageEnhance
|
||||||
|
|
||||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||||
|
|
||||||
|
def vectorize_label(label, char_to_num, padding_token, max_len):
|
||||||
|
label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
|
||||||
|
length = tf.shape(label)[0]
|
||||||
|
pad_amount = max_len - length
|
||||||
|
label = tf.pad(label, paddings=[[0, pad_amount]], constant_values=padding_token)
|
||||||
|
return label
|
||||||
|
|
||||||
|
def scale_padd_image_for_ocr(img, height, width):
|
||||||
|
ratio = height /float(img.shape[0])
|
||||||
|
|
||||||
|
w_ratio = int(ratio * img.shape[1])
|
||||||
|
|
||||||
|
if w_ratio<=width:
|
||||||
|
width_new = w_ratio
|
||||||
|
else:
|
||||||
|
width_new = width
|
||||||
|
|
||||||
|
img_res= resize_image (img, height, width_new)
|
||||||
|
img_fin = np.ones((height, width, 3))*255
|
||||||
|
|
||||||
|
img_fin[:,:width_new,:] = img_res[:,:,:]
|
||||||
|
return img_fin
|
||||||
|
|
||||||
def add_salt_and_pepper_noise(img, salt_prob, pepper_prob):
|
def add_salt_and_pepper_noise(img, salt_prob, pepper_prob):
|
||||||
"""
|
"""
|
||||||
Add salt-and-pepper noise to an image.
|
Add salt-and-pepper noise to an image.
|
||||||
|
|
@ -1269,8 +1293,9 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
adding_rgb_foreground, adding_rgb_background, binarization, image_inversion, channels_shuffling, add_red_textlines, white_noise_strap,
|
adding_rgb_foreground, adding_rgb_background, binarization, image_inversion, channels_shuffling, add_red_textlines, white_noise_strap,
|
||||||
textline_skewing, textline_skewing_bin, textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth,
|
textline_skewing, textline_skewing_bin, textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth,
|
||||||
textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, textline_down_in_depth, textline_down_in_depth_bin,
|
textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, textline_down_in_depth, textline_down_in_depth_bin,
|
||||||
pepper_bin_aug, pepper_aug, degrade_scales, number_of_backgrounds_per_image, thetha, brightness, padd_colors,
|
pepper_bin_aug, pepper_aug, degrade_scales, number_of_backgrounds_per_image, thetha, thetha_padd, brightness, padd_colors,
|
||||||
shuffle_indexes, pepper_indexes, skewing_amplitudes, dir_img_bin=None):
|
shuffle_indexes, pepper_indexes, skewing_amplitudes, blur_k, char_to_num, list_all_possible_background_images,
|
||||||
|
list_all_possible_foreground_rgbs, dir_rgb_backgrounds, dir_rgb_foregrounds, white_padds, dir_img_bin=None):
|
||||||
|
|
||||||
random.shuffle(ls_files_images)
|
random.shuffle(ls_files_images)
|
||||||
|
|
||||||
|
|
@ -1294,7 +1319,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1306,14 +1331,14 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
batchcount = 0
|
batchcount = 0
|
||||||
|
|
||||||
if color_padding_rotation:
|
if color_padding_rotation:
|
||||||
for index, thetha in enumerate(thetha_padd):
|
for index, thetha_ind in enumerate(thetha_padd):
|
||||||
for padd_col in padd_colors:
|
for padd_col in padd_colors:
|
||||||
img_out = rotation_not_90_func(do_padding(img, 1.2, padd_col), thetha)
|
img_out = rotation_not_90_func_single_image(do_padding_for_ocr(img, 1.2, padd_col), thetha_ind)
|
||||||
|
|
||||||
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1325,12 +1350,12 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
batchcount = 0
|
batchcount = 0
|
||||||
|
|
||||||
if rotation_not_90:
|
if rotation_not_90:
|
||||||
for index, thetha in enumerate(thetha):
|
for index, thetha_ind in enumerate(thetha):
|
||||||
img_out = rotation_not_90_func(img, thetha)
|
img_out = rotation_not_90_func_single_image(img, thetha_ind)
|
||||||
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1342,12 +1367,12 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
batchcount = 0
|
batchcount = 0
|
||||||
|
|
||||||
if blur_aug:
|
if blur_aug:
|
||||||
for index, blur_type in enumerate(blurs):
|
for index, blur_type in enumerate(blur_k):
|
||||||
img_out = bluring(img, blur_type)
|
img_out = bluring(img, blur_type)
|
||||||
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1367,7 +1392,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = np.copy(img)
|
img_out = np.copy(img)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1388,7 +1413,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1409,7 +1434,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1423,11 +1448,11 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
if padding_white:
|
if padding_white:
|
||||||
for index, padding_size in enumerate(white_padds):
|
for index, padding_size in enumerate(white_padds):
|
||||||
for padd_col in padd_colors:
|
for padd_col in padd_colors:
|
||||||
img_out = do_padding(img, padding_size, padd_col)
|
img_out = do_padding_for_ocr(img, padding_size, padd_col)
|
||||||
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1451,7 +1476,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img_with_overlayed_background, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_with_overlayed_background, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1472,7 +1497,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img_with_overlayed_background, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_with_overlayed_background, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1487,7 +1512,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img_bin_corr, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_bin_corr, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1503,7 +1528,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :, :, :] = img_out[:,:,:]
|
ret_x[batchcount, :, :, :] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1511,7 +1536,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
ret_x = ret_x/255.
|
ret_x = ret_x/255.
|
||||||
yield {"image": ret_x, "label": ret_y}
|
yield {"image": ret_x, "label": ret_y}
|
||||||
ret_x = np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
|
ret_x = np.zeros((n_batch, input_height, input_width, 3)).astype(np.float32)
|
||||||
ret_y = np.zeros((batch_size, max_len)).astype(np.int16)+padding_token
|
ret_y = np.zeros((n_batch, max_len)).astype(np.int16)+padding_token
|
||||||
batchcount = 0
|
batchcount = 0
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1521,7 +1546,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1539,7 +1564,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img_red_context, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_red_context, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1556,7 +1581,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1576,7 +1601,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = np.copy(img)
|
img_out = np.copy(img)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1596,7 +1621,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = np.copy(img_bin_corr)
|
img_out = np.copy(img_bin_corr)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1616,7 +1641,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = np.copy(img)
|
img_out = np.copy(img)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1636,7 +1661,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = np.copy(img_bin_corr)
|
img_out = np.copy(img_bin_corr)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1656,7 +1681,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = np.copy(img)
|
img_out = np.copy(img)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1676,7 +1701,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = np.copy(img_bin_corr)
|
img_out = np.copy(img_bin_corr)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1696,7 +1721,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = np.copy(img)
|
img_out = np.copy(img)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1716,7 +1741,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = np.copy(img_bin_corr)
|
img_out = np.copy(img_bin_corr)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1736,7 +1761,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = np.copy(img)
|
img_out = np.copy(img)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1756,7 +1781,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = np.copy(img_bin_corr)
|
img_out = np.copy(img_bin_corr)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1773,7 +1798,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1791,7 +1816,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img_out, input_height, input_width)
|
||||||
|
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1809,7 +1834,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
img_out = scale_padd_image_for_ocr(img, input_height, input_width)
|
img_out = scale_padd_image_for_ocr(img, input_height, input_width)
|
||||||
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
ret_x[batchcount, :,:,:] = img_out[:,:,:]
|
||||||
|
|
||||||
ret_y[batchcount, :] = vectorize_label(txt_inp)
|
ret_y[batchcount, :] = vectorize_label(txt_inp, char_to_num, padding_token, max_len)
|
||||||
|
|
||||||
batchcount+=1
|
batchcount+=1
|
||||||
|
|
||||||
|
|
@ -1823,7 +1848,7 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir
|
||||||
|
|
||||||
def return_multiplier_based_on_augmnentations(augmentation, color_padding_rotation, rotation_not_90, blur_aug,
|
def return_multiplier_based_on_augmnentations(augmentation, color_padding_rotation, rotation_not_90, blur_aug,
|
||||||
degrading, bin_deg, brightening, padding_white,adding_rgb_foreground, adding_rgb_background, binarization, image_inversion, channels_shuffling, add_red_textlines, white_noise_strap,
|
degrading, bin_deg, brightening, padding_white,adding_rgb_foreground, adding_rgb_background, binarization, image_inversion, channels_shuffling, add_red_textlines, white_noise_strap,
|
||||||
textline_skewing, textline_skewing_bin, textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth, textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, textline_down_in_depth, textline_down_in_depth_bin, pepper_bin_aug, pepper_aug, degrade_scales, number_of_backgrounds_per_image, thetha, brightness, padd_colors, shuffle_indexes, pepper_indexes, skewing_amplitudes):
|
textline_skewing, textline_skewing_bin, textline_left_in_depth, textline_left_in_depth_bin, textline_right_in_depth, textline_right_in_depth_bin, textline_up_in_depth, textline_up_in_depth_bin, textline_down_in_depth, textline_down_in_depth_bin, pepper_bin_aug, pepper_aug, degrade_scales, number_of_backgrounds_per_image, thetha, thetha_padd, brightness, padd_colors, shuffle_indexes, pepper_indexes, skewing_amplitudes, blur_k, white_padds):
|
||||||
aug_multip = 1
|
aug_multip = 1
|
||||||
|
|
||||||
if augmentation:
|
if augmentation:
|
||||||
|
|
@ -1870,7 +1895,7 @@ def return_multiplier_based_on_augmnentations(augmentation, color_padding_rotati
|
||||||
if channels_shuffling:
|
if channels_shuffling:
|
||||||
aug_multip = aug_multip + len(shuffle_indexes)
|
aug_multip = aug_multip + len(shuffle_indexes)
|
||||||
if blur_aug:
|
if blur_aug:
|
||||||
aug_multip = aug_multip + len(blurs)
|
aug_multip = aug_multip + len(blur_k)
|
||||||
if brightening:
|
if brightening:
|
||||||
aug_multip = aug_multip + len(brightness)
|
aug_multip = aug_multip + len(brightness)
|
||||||
if padding_white:
|
if padding_white:
|
||||||
|
|
|
||||||
|
|
@ -3,30 +3,48 @@
|
||||||
"task": "cnn-rnn-ocr",
|
"task": "cnn-rnn-ocr",
|
||||||
"n_classes" : 2,
|
"n_classes" : 2,
|
||||||
"max_len": 280,
|
"max_len": 280,
|
||||||
"n_epochs" : 0,
|
"n_epochs" : 3,
|
||||||
"input_height" : 32,
|
"input_height" : 32,
|
||||||
"input_width" : 512,
|
"input_width" : 512,
|
||||||
"weight_decay" : 1e-6,
|
"weight_decay" : 1e-6,
|
||||||
"n_batch" : 1,
|
"n_batch" : 4,
|
||||||
"learning_rate": 1e-4,
|
"learning_rate": 1e-5,
|
||||||
|
"save_interval": 1500,
|
||||||
"patches" : false,
|
"patches" : false,
|
||||||
"pretraining" : true,
|
"pretraining" : true,
|
||||||
"augmentation" : false,
|
"augmentation" : true,
|
||||||
"flip_aug" : false,
|
"flip_aug" : false,
|
||||||
"blur_aug" : false,
|
"blur_aug" : true,
|
||||||
"scaling" : false,
|
"scaling" : false,
|
||||||
"adding_rgb_background": true,
|
"adding_rgb_background": true,
|
||||||
"adding_rgb_foreground": true,
|
"adding_rgb_foreground": true,
|
||||||
"add_red_textlines": false,
|
"add_red_textlines": true,
|
||||||
"channels_shuffling": false,
|
"white_noise_strap": true,
|
||||||
"degrading": false,
|
"textline_right_in_depth": true,
|
||||||
"brightening": false,
|
"textline_left_in_depth": true,
|
||||||
|
"textline_up_in_depth": true,
|
||||||
|
"textline_down_in_depth": true,
|
||||||
|
"textline_right_in_depth_bin": true,
|
||||||
|
"textline_left_in_depth_bin": true,
|
||||||
|
"textline_up_in_depth_bin": true,
|
||||||
|
"textline_down_in_depth_bin": true,
|
||||||
|
"bin_deg": true,
|
||||||
|
"textline_skewing": true,
|
||||||
|
"textline_skewing_bin": true,
|
||||||
|
"channels_shuffling": true,
|
||||||
|
"degrading": true,
|
||||||
|
"brightening": true,
|
||||||
"binarization" : true,
|
"binarization" : true,
|
||||||
|
"pepper_aug": true,
|
||||||
|
"pepper_bin_aug": true,
|
||||||
|
"image_inversion": true,
|
||||||
"scaling_bluring" : false,
|
"scaling_bluring" : false,
|
||||||
"scaling_binarization" : false,
|
"scaling_binarization" : false,
|
||||||
"scaling_flip" : false,
|
"scaling_flip" : false,
|
||||||
"rotation": false,
|
"rotation": false,
|
||||||
"rotation_not_90": false,
|
"color_padding_rotation": true,
|
||||||
|
"padding_white": true,
|
||||||
|
"rotation_not_90": true,
|
||||||
"transformer_num_patches_xy": [56, 56],
|
"transformer_num_patches_xy": [56, 56],
|
||||||
"transformer_patchsize_x": 4,
|
"transformer_patchsize_x": 4,
|
||||||
"transformer_patchsize_y": 4,
|
"transformer_patchsize_y": 4,
|
||||||
|
|
@ -36,12 +54,17 @@
|
||||||
"transformer_num_heads": 1,
|
"transformer_num_heads": 1,
|
||||||
"transformer_cnn_first": false,
|
"transformer_cnn_first": false,
|
||||||
"blur_k" : ["blur","gauss","median"],
|
"blur_k" : ["blur","gauss","median"],
|
||||||
|
"padd_colors" : ["white", "black"],
|
||||||
"scales" : [0.6, 0.7, 0.8, 0.9],
|
"scales" : [0.6, 0.7, 0.8, 0.9],
|
||||||
"brightness" : [1.3, 1.5, 1.7, 2],
|
"brightness" : [1.3, 1.5, 1.7, 2],
|
||||||
"degrade_scales" : [0.2, 0.4],
|
"degrade_scales" : [0.2, 0.4],
|
||||||
|
"pepper_indexes": [0.01, 0.005],
|
||||||
|
"skewing_amplitudes" : [5, 8],
|
||||||
"flip_index" : [0, 1, -1],
|
"flip_index" : [0, 1, -1],
|
||||||
"shuffle_indexes" : [ [0,2,1], [1,2,0], [1,0,2] , [2,1,0]],
|
"shuffle_indexes" : [ [0,2,1], [1,2,0], [1,0,2] , [2,1,0]],
|
||||||
"thetha" : [5, -5],
|
"thetha" : [0.1, 0.2, -0.1, -0.2],
|
||||||
|
"thetha_padd": [-0.6, -1, -1.4, -1.8, 0.6, 1, 1.4, 1.8],
|
||||||
|
"white_padds" : [0.1, 0.3, 0.5, 0.7, 0.9],
|
||||||
"number_of_backgrounds_per_image": 2,
|
"number_of_backgrounds_per_image": 2,
|
||||||
"continue_training": false,
|
"continue_training": false,
|
||||||
"index_start" : 0,
|
"index_start" : 0,
|
||||||
|
|
@ -54,7 +77,7 @@
|
||||||
"dir_output": "/home/vahid/extracted_lines/1919_bin/output",
|
"dir_output": "/home/vahid/extracted_lines/1919_bin/output",
|
||||||
"dir_rgb_backgrounds": "/home/vahid/Documents/1_2_test_eynollah/set_rgb_background",
|
"dir_rgb_backgrounds": "/home/vahid/Documents/1_2_test_eynollah/set_rgb_background",
|
||||||
"dir_rgb_foregrounds": "/home/vahid/Documents/1_2_test_eynollah/out_set_rgb_foreground",
|
"dir_rgb_foregrounds": "/home/vahid/Documents/1_2_test_eynollah/out_set_rgb_foreground",
|
||||||
"dir_img_bin": "/home/vahid/Documents/test/sbb_pixelwise_segmentation/test_label/pageextractor_test/train_new/images_bin",
|
"dir_img_bin": "/home/vahid/extracted_lines/1919_bin/images_bin",
|
||||||
"characters_txt_file":"/home/vahid/Downloads/models_eynollah/model_eynollah_ocr_cnnrnn_20250930/characters_org.txt"
|
"characters_txt_file":"/home/vahid/Downloads/models_eynollah/model_eynollah_ocr_cnnrnn_20250930/characters_org.txt"
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue