enhance ocr for vertical textlines

This commit is contained in:
vahidrezanezhad 2025-05-14 18:34:58 +02:00
parent 1ccd3fb7cf
commit a9cdd56e9a

View file

@ -5133,7 +5133,7 @@ class Eynollah_ocr:
self.b_s = int(batch_size) self.b_s = int(batch_size)
else: else:
self.model_ocr_dir = dir_models + "/model_ens_ocrcnn_125_225"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" self.model_ocr_dir = dir_models + "/model_step_425000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
model_ocr = load_model(self.model_ocr_dir , compile=False) model_ocr = load_model(self.model_ocr_dir , compile=False)
self.prediction_model = tf.keras.models.Model( self.prediction_model = tf.keras.models.Model(
@ -5585,6 +5585,7 @@ class Eynollah_ocr:
region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')])
cropped_lines = [] cropped_lines = []
cropped_lines_ver_index = []
cropped_lines_region_indexer = [] cropped_lines_region_indexer = []
cropped_lines_meging_indexing = [] cropped_lines_meging_indexing = []
@ -5644,6 +5645,11 @@ class Eynollah_ocr:
if w_scaled < 1.5*image_width: if w_scaled < 1.5*image_width:
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
cropped_lines.append(img_fin) cropped_lines.append(img_fin)
if angle_degrees > 15:
cropped_lines_ver_index.append(1)
else:
cropped_lines_ver_index.append(0)
cropped_lines_meging_indexing.append(0) cropped_lines_meging_indexing.append(0)
if self.prediction_with_both_of_rgb_and_bin: if self.prediction_with_both_of_rgb_and_bin:
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
@ -5657,11 +5663,22 @@ class Eynollah_ocr:
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
cropped_lines.append(img_fin) cropped_lines.append(img_fin)
cropped_lines_meging_indexing.append(1) cropped_lines_meging_indexing.append(1)
if angle_degrees > 15:
cropped_lines_ver_index.append(1)
else:
cropped_lines_ver_index.append(0)
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
cropped_lines.append(img_fin) cropped_lines.append(img_fin)
cropped_lines_meging_indexing.append(-1) cropped_lines_meging_indexing.append(-1)
if angle_degrees > 15:
cropped_lines_ver_index.append(1)
else:
cropped_lines_ver_index.append(0)
if self.prediction_with_both_of_rgb_and_bin: if self.prediction_with_both_of_rgb_and_bin:
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width) img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width)
cropped_lines_bin.append(img_fin) cropped_lines_bin.append(img_fin)
@ -5673,6 +5690,11 @@ class Eynollah_ocr:
cropped_lines.append(img_fin) cropped_lines.append(img_fin)
cropped_lines_meging_indexing.append(0) cropped_lines_meging_indexing.append(0)
if angle_degrees > 15:
cropped_lines_ver_index.append(1)
else:
cropped_lines_ver_index.append(0)
if self.prediction_with_both_of_rgb_and_bin: if self.prediction_with_both_of_rgb_and_bin:
img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
cropped_lines_bin.append(img_fin) cropped_lines_bin.append(img_fin)
@ -5722,6 +5744,19 @@ class Eynollah_ocr:
imgs = cropped_lines[n_start:] imgs = cropped_lines[n_start:]
imgs = np.array(imgs) imgs = np.array(imgs)
imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3) imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
ver_imgs = np.array( cropped_lines_ver_index[n_start:] )
indices_ver = np.where(ver_imgs == 1)[0]
#print(indices_ver, 'indices_ver')
if len(indices_ver)>0:
imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
#print(imgs_ver_flipped, 'imgs_ver_flipped')
else:
imgs_ver_flipped = None
if self.prediction_with_both_of_rgb_and_bin: if self.prediction_with_both_of_rgb_and_bin:
imgs_bin = cropped_lines_bin[n_start:] imgs_bin = cropped_lines_bin[n_start:]
imgs_bin = np.array(imgs_bin) imgs_bin = np.array(imgs_bin)
@ -5732,12 +5767,54 @@ class Eynollah_ocr:
imgs = cropped_lines[n_start:n_end] imgs = cropped_lines[n_start:n_end]
imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3) imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3)
ver_imgs = np.array( cropped_lines_ver_index[n_start:n_end] )
indices_ver = np.where(ver_imgs == 1)[0]
#print(indices_ver, 'indices_ver')
if len(indices_ver)>0:
imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
#print(imgs_ver_flipped, 'imgs_ver_flipped')
else:
imgs_ver_flipped = None
if self.prediction_with_both_of_rgb_and_bin: if self.prediction_with_both_of_rgb_and_bin:
imgs_bin = cropped_lines_bin[n_start:n_end] imgs_bin = cropped_lines_bin[n_start:n_end]
imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3) imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
preds = self.prediction_model.predict(imgs, verbose=0) preds = self.prediction_model.predict(imgs, verbose=0)
if len(indices_ver)>0:
#cv2.imwrite('flipped.png', (imgs_ver_flipped[0, :,:,:]*255).astype('uint8'))
#cv2.imwrite('original.png', (imgs[0, :,:,:]*255).astype('uint8'))
#sys.exit()
#print(imgs_ver_flipped.shape, 'imgs_ver_flipped.shape')
preds_flipped = self.prediction_model.predict(imgs_ver_flipped, verbose=0)
preds_max_fliped = np.max(preds_flipped, axis=2 )
preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256
masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
masked_means_flipped[np.isnan(masked_means_flipped)] = 0
#print(masked_means_flipped, 'masked_means_flipped')
preds_max = np.max(preds, axis=2 )
preds_max_args = np.argmax(preds, axis=2 )
pred_max_not_unk_mask_bool = preds_max_args[:,:]!=256
masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
masked_means[np.isnan(masked_means)] = 0
masked_means_ver = masked_means[indices_ver]
#print(masked_means_ver, 'pred_max_not_unk')
indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0]
#print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
if len(indices_where_flipped_conf_value_is_higher)>0:
indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
if self.prediction_with_both_of_rgb_and_bin: if self.prediction_with_both_of_rgb_and_bin:
preds_bin = self.prediction_model.predict(imgs_bin, verbose=0) preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
preds = (preds + preds_bin) / 2. preds = (preds + preds_bin) / 2.