From a9cdd56e9a2a30f89020487fe2567df9d5426fa0 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 14 May 2025 18:34:58 +0200 Subject: [PATCH] enhance ocr for vertical textlines --- src/eynollah/eynollah.py | 79 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 9f2ca50..5a73ef3 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -5133,7 +5133,7 @@ class Eynollah_ocr: self.b_s = int(batch_size) else: - self.model_ocr_dir = dir_models + "/model_ens_ocrcnn_125_225"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" + self.model_ocr_dir = dir_models + "/model_step_425000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn" model_ocr = load_model(self.model_ocr_dir , compile=False) self.prediction_model = tf.keras.models.Model( @@ -5585,6 +5585,7 @@ class Eynollah_ocr: region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) cropped_lines = [] + cropped_lines_ver_index = [] cropped_lines_region_indexer = [] cropped_lines_meging_indexing = [] @@ -5644,6 +5645,11 @@ class Eynollah_ocr: if w_scaled < 1.5*image_width: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) cropped_lines.append(img_fin) + if angle_degrees > 15: + cropped_lines_ver_index.append(1) + else: + cropped_lines_ver_index.append(0) + cropped_lines_meging_indexing.append(0) if self.prediction_with_both_of_rgb_and_bin: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) @@ -5657,11 +5663,22 @@ class Eynollah_ocr: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(1) + + if angle_degrees > 15: + cropped_lines_ver_index.append(1) + else: + cropped_lines_ver_index.append(0) + img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(-1) + if angle_degrees > 15: + cropped_lines_ver_index.append(1) + else: + cropped_lines_ver_index.append(0) + if self.prediction_with_both_of_rgb_and_bin: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width) cropped_lines_bin.append(img_fin) @@ -5673,6 +5690,11 @@ class Eynollah_ocr: cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) + if angle_degrees > 15: + cropped_lines_ver_index.append(1) + else: + cropped_lines_ver_index.append(0) + if self.prediction_with_both_of_rgb_and_bin: img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width) cropped_lines_bin.append(img_fin) @@ -5722,6 +5744,19 @@ class Eynollah_ocr: imgs = cropped_lines[n_start:] imgs = np.array(imgs) imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3) + + ver_imgs = np.array( cropped_lines_ver_index[n_start:] ) + indices_ver = np.where(ver_imgs == 1)[0] + + #print(indices_ver, 'indices_ver') + if len(indices_ver)>0: + imgs_ver_flipped = imgs[indices_ver, : ,: ,:] + imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:] + #print(imgs_ver_flipped, 'imgs_ver_flipped') + + else: + imgs_ver_flipped = None + if self.prediction_with_both_of_rgb_and_bin: imgs_bin = cropped_lines_bin[n_start:] imgs_bin = np.array(imgs_bin) @@ -5732,12 +5767,54 @@ class Eynollah_ocr: imgs = cropped_lines[n_start:n_end] imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3) + ver_imgs = np.array( cropped_lines_ver_index[n_start:n_end] ) + indices_ver = np.where(ver_imgs == 1)[0] + #print(indices_ver, 'indices_ver') + + if len(indices_ver)>0: + imgs_ver_flipped = imgs[indices_ver, : ,: ,:] + imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:] + #print(imgs_ver_flipped, 'imgs_ver_flipped') + else: + imgs_ver_flipped = None + + if self.prediction_with_both_of_rgb_and_bin: imgs_bin = cropped_lines_bin[n_start:n_end] imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3) preds = self.prediction_model.predict(imgs, verbose=0) + + if len(indices_ver)>0: + #cv2.imwrite('flipped.png', (imgs_ver_flipped[0, :,:,:]*255).astype('uint8')) + #cv2.imwrite('original.png', (imgs[0, :,:,:]*255).astype('uint8')) + #sys.exit() + #print(imgs_ver_flipped.shape, 'imgs_ver_flipped.shape') + preds_flipped = self.prediction_model.predict(imgs_ver_flipped, verbose=0) + preds_max_fliped = np.max(preds_flipped, axis=2 ) + preds_max_args_flipped = np.argmax(preds_flipped, axis=2 ) + pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256 + masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1) + masked_means_flipped[np.isnan(masked_means_flipped)] = 0 + #print(masked_means_flipped, 'masked_means_flipped') + + preds_max = np.max(preds, axis=2 ) + preds_max_args = np.argmax(preds, axis=2 ) + pred_max_not_unk_mask_bool = preds_max_args[:,:]!=256 + + masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1) + masked_means[np.isnan(masked_means)] = 0 + + masked_means_ver = masked_means[indices_ver] + #print(masked_means_ver, 'pred_max_not_unk') + + indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0] + + #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher') + if len(indices_where_flipped_conf_value_is_higher)>0: + indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher] + preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :] if self.prediction_with_both_of_rgb_and_bin: preds_bin = self.prediction_model.predict(imgs_bin, verbose=0) preds = (preds + preds_bin) / 2.