From a9cdd56e9a2a30f89020487fe2567df9d5426fa0 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Wed, 14 May 2025 18:34:58 +0200
Subject: [PATCH] enhance ocr for vertical textlines

---
 src/eynollah/eynollah.py | 79 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 9f2ca50..5a73ef3 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5133,7 +5133,7 @@ class Eynollah_ocr:
                     self.b_s = int(batch_size)
 
             else:
-                self.model_ocr_dir = dir_models + "/model_ens_ocrcnn_125_225"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
+                self.model_ocr_dir = dir_models + "/model_step_425000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
                 model_ocr = load_model(self.model_ocr_dir , compile=False)
                 
                 self.prediction_model = tf.keras.models.Model(
@@ -5585,6 +5585,7 @@ class Eynollah_ocr:
                 region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) 
                     
                 cropped_lines = []
+                cropped_lines_ver_index = []
                 cropped_lines_region_indexer = []
                 cropped_lines_meging_indexing = []
                 
@@ -5644,6 +5645,11 @@ class Eynollah_ocr:
                                         if w_scaled < 1.5*image_width:
                                             img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width)
                                             cropped_lines.append(img_fin)
+                                            if angle_degrees > 15:
+                                                cropped_lines_ver_index.append(1)
+                                            else:
+                                                cropped_lines_ver_index.append(0)
+                                                
                                             cropped_lines_meging_indexing.append(0)
                                             if self.prediction_with_both_of_rgb_and_bin:
                                                 img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
@@ -5657,11 +5663,22 @@ class Eynollah_ocr:
                                                 img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width)
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(1)
+                                                
+                                                if angle_degrees > 15:
+                                                    cropped_lines_ver_index.append(1)
+                                                else:
+                                                    cropped_lines_ver_index.append(0)
+                                                
                                                 img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width)
                                                 
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(-1)
                                                 
+                                                if angle_degrees > 15:
+                                                    cropped_lines_ver_index.append(1)
+                                                else:
+                                                    cropped_lines_ver_index.append(0)
+                                                
                                                 if self.prediction_with_both_of_rgb_and_bin:
                                                     img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(splited_images_bin[0], image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
@@ -5673,6 +5690,11 @@ class Eynollah_ocr:
                                                 cropped_lines.append(img_fin)
                                                 cropped_lines_meging_indexing.append(0)
                                                 
+                                                if angle_degrees > 15:
+                                                    cropped_lines_ver_index.append(1)
+                                                else:
+                                                    cropped_lines_ver_index.append(0)
+                                                
                                                 if self.prediction_with_both_of_rgb_and_bin:
                                                     img_fin = self.preprocess_and_resize_image_for_ocrcnn_model(img_crop_bin, image_height, image_width)
                                                     cropped_lines_bin.append(img_fin)
@@ -5722,6 +5744,19 @@ class Eynollah_ocr:
                             imgs = cropped_lines[n_start:]
                             imgs = np.array(imgs)
                             imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
+                            
+                            ver_imgs = np.array( cropped_lines_ver_index[n_start:] )
+                            indices_ver = np.where(ver_imgs == 1)[0]
+                            
+                            #print(indices_ver, 'indices_ver')
+                            if len(indices_ver)>0:
+                                imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
+                                imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
+                                #print(imgs_ver_flipped, 'imgs_ver_flipped')
+                                
+                            else:
+                                imgs_ver_flipped = None
+                            
                             if self.prediction_with_both_of_rgb_and_bin:
                                 imgs_bin = cropped_lines_bin[n_start:]
                                 imgs_bin = np.array(imgs_bin)
@@ -5732,12 +5767,54 @@ class Eynollah_ocr:
                             imgs = cropped_lines[n_start:n_end]
                             imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3)
                             
+                            ver_imgs = np.array( cropped_lines_ver_index[n_start:n_end] )
+                            indices_ver = np.where(ver_imgs == 1)[0]
+                            #print(indices_ver, 'indices_ver')
+                            
+                            if len(indices_ver)>0:
+                                imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
+                                imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
+                                #print(imgs_ver_flipped, 'imgs_ver_flipped')
+                            else:
+                                imgs_ver_flipped = None
+
+                            
                             if self.prediction_with_both_of_rgb_and_bin:
                                 imgs_bin = cropped_lines_bin[n_start:n_end]
                                 imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
                             
 
                         preds = self.prediction_model.predict(imgs, verbose=0)
+                        
+                        if len(indices_ver)>0:
+                            #cv2.imwrite('flipped.png', (imgs_ver_flipped[0, :,:,:]*255).astype('uint8'))
+                            #cv2.imwrite('original.png', (imgs[0, :,:,:]*255).astype('uint8'))
+                            #sys.exit()
+                            #print(imgs_ver_flipped.shape, 'imgs_ver_flipped.shape')
+                            preds_flipped = self.prediction_model.predict(imgs_ver_flipped, verbose=0)
+                            preds_max_fliped = np.max(preds_flipped, axis=2 )
+                            preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
+                            pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=256
+                            masked_means_flipped = np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
+                            masked_means_flipped[np.isnan(masked_means_flipped)] = 0
+                            #print(masked_means_flipped, 'masked_means_flipped')
+                            
+                            preds_max = np.max(preds, axis=2 )
+                            preds_max_args = np.argmax(preds, axis=2 )
+                            pred_max_not_unk_mask_bool = preds_max_args[:,:]!=256
+                            
+                            masked_means = np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / np.sum(pred_max_not_unk_mask_bool, axis=1)
+                            masked_means[np.isnan(masked_means)] = 0
+                            
+                            masked_means_ver = masked_means[indices_ver]
+                            #print(masked_means_ver, 'pred_max_not_unk')
+                            
+                            indices_where_flipped_conf_value_is_higher = np.where(masked_means_flipped > masked_means_ver)[0]
+                            
+                            #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
+                            if len(indices_where_flipped_conf_value_is_higher)>0:
+                                indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
+                                preds[indices_to_be_replaced,:,:] = preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
                         if self.prediction_with_both_of_rgb_and_bin:
                             preds_bin = self.prediction_model.predict(imgs_bin, verbose=0)
                             preds = (preds + preds_bin) / 2.