From 1a0a1cb00b3820854f725a49bb35cd4c3cb42a6a Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Tue, 1 Apr 2025 21:15:41 +0200
Subject: [PATCH] remove session methods and redundant model loaders

---
 src/eynollah/eynollah.py | 198 ++++++++++-----------------------------
 1 file changed, 51 insertions(+), 147 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 42bbf31..35f7898 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -323,50 +323,52 @@ class Eynollah:
             self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"
         if self.ocr:
             self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
-            
         if self.tables:
             if self.light_version:
                 self.model_table_dir = dir_models + "/modelens_table_0t4_201124"
             else:
                 self.model_table_dir = dir_models + "/eynollah-tables_20210319"
-        
-        self.models = {}
-        
-        if dir_in:
-            # as in start_new_session:
-            config = tf.compat.v1.ConfigProto()
-            config.gpu_options.allow_growth = True
-            session = tf.compat.v1.Session(config=config)
-            set_session(session)
-            
-            self.model_page = self.our_load_model(self.model_page_dir)
-            self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
-            self.model_bin = self.our_load_model(self.model_dir_of_binarization)
-            if self.extract_only_images:
-                self.model_region = self.our_load_model(self.model_region_dir_p_ens_light_only_images_extraction)
+
+        # #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
+        # #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True)
+        # #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
+        # config = tf.compat.v1.ConfigProto()
+        # config.gpu_options.allow_growth = True
+        # #session = tf.InteractiveSession()
+        # session = tf.compat.v1.Session(config=config)
+        # set_session(session)
+        try:
+            for device in tf.config.list_physical_devices('GPU'):
+                tf.config.experimental.set_memory_growth(device, True)
+        except:
+            self.logger.warning("no GPU device available")
+
+        self.model_page = self.our_load_model(self.model_page_dir)
+        self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
+        self.model_bin = self.our_load_model(self.model_dir_of_binarization)
+        if self.extract_only_images:
+            self.model_region = self.our_load_model(self.model_region_dir_p_ens_light_only_images_extraction)
+        else:
+            self.model_textline = self.our_load_model(self.model_textline_dir)
+            if self.light_version:
+                self.model_region = self.our_load_model(self.model_region_dir_p_ens_light)
+                self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np)
             else:
-                self.model_textline = self.our_load_model(self.model_textline_dir)
-                if self.light_version:
-                    self.model_region = self.our_load_model(self.model_region_dir_p_ens_light)
-                    self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np)
-                else:
-                    self.model_region = self.our_load_model(self.model_region_dir_p_ens)
-                    self.model_region_p2 = self.our_load_model(self.model_region_dir_p2)
-                    self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement)
-                ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new)
-                self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np)
-                self.model_region_fl = self.our_load_model(self.model_region_dir_fully)
-                if self.reading_order_machine_based:
-                    self.model_reading_order = self.our_load_model(self.model_reading_order_dir)
-                if self.ocr:
-                    self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
-                    self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-                    #("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten")
-                    self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
-                if self.tables:
-                    self.model_table = self.our_load_model(self.model_table_dir)
-            
-            self.ls_imgs  = os.listdir(self.dir_in)
+                self.model_region = self.our_load_model(self.model_region_dir_p_ens)
+                self.model_region_p2 = self.our_load_model(self.model_region_dir_p2)
+                self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement)
+            ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new)
+            self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np)
+            self.model_region_fl = self.our_load_model(self.model_region_dir_fully)
+            if self.reading_order_machine_based:
+                self.model_reading_order = self.our_load_model(self.model_reading_order_dir)
+            if self.ocr:
+                self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
+                self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+                #("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten")
+                self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+            if self.tables:
+                self.model_table = self.our_load_model(self.model_table_dir)
 
     def _cache_images(self, image_filename=None, image_pil=None):
         ret = {}
@@ -421,8 +423,6 @@ class Eynollah:
 
     def predict_enhancement(self, img):
         self.logger.debug("enter predict_enhancement")
-        if not self.dir_in:
-            self.model_enhancement, _ = self.start_new_session_and_model(self.model_dir_of_enhancement)
 
         img_height_model = self.model_enhancement.layers[-1].output_shape[1]
         img_width_model = self.model_enhancement.layers[-1].output_shape[2]
@@ -619,9 +619,6 @@ class Eynollah:
             img = self.imread()
 
         _, page_coord = self.early_page_for_num_of_column_classification(img)
-        
-        if not self.dir_in:
-            self.model_classifier, _ = self.start_new_session_and_model(self.model_dir_of_col_classifier)
 
         if self.input_binary:
             img_in = np.copy(img)
@@ -662,9 +659,6 @@ class Eynollah:
         self.logger.info("Detected %s DPI", dpi)
         if self.input_binary:
             img = self.imread()
-            if not self.dir_in:
-                self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization)
-
             prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5)
             prediction_bin = 255 * (prediction_bin[:,:,0]==0)
             prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8)
@@ -673,17 +667,14 @@ class Eynollah:
         else:
             img = self.imread()
             img_bin = None
-        
+
         width_early = img.shape[1]
         t1 = time.time()
         _, page_coord = self.early_page_for_num_of_column_classification(img_bin)
-        
+
         self.image_page_org_size = img[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :]
         self.page_coord = page_coord
-        
-        if not self.dir_in:
-            self.model_classifier, _ = self.start_new_session_and_model(self.model_dir_of_col_classifier)
-        
+
         if self.num_col_upper and not self.num_col_lower:
             num_col = self.num_col_upper
             label_p_pred = [np.ones(6)]
@@ -823,43 +814,6 @@ class Eynollah:
         self.writer.height_org = self.height_org
         self.writer.width_org = self.width_org
 
-    def start_new_session_and_model_old(self, model_dir):
-        self.logger.debug("enter start_new_session_and_model (model_dir=%s)", model_dir)
-        config = tf.ConfigProto()
-        config.gpu_options.allow_growth = True
-
-        session = tf.InteractiveSession()
-        model = load_model(model_dir, compile=False)
-
-        return model, session
-
-    def start_new_session_and_model(self, model_dir):
-        self.logger.debug("enter start_new_session_and_model (model_dir=%s)", model_dir)
-        #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
-        #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True)
-        #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
-        physical_devices = tf.config.list_physical_devices('GPU')
-        try:
-            for device in physical_devices:
-                tf.config.experimental.set_memory_growth(device, True)
-        except:
-            self.logger.warning("no GPU device available")
-
-        if model_dir.endswith('.h5') and Path(model_dir[:-3]).exists():
-            # prefer SavedModel over HDF5 format if it exists
-            model_dir = model_dir[:-3]
-        if model_dir in self.models:
-            model = self.models[model_dir]
-        else:
-            try:
-                model = load_model(model_dir, compile=False)
-            except:
-                model = load_model(model_dir , compile=False, custom_objects={
-                    "PatchEncoder": PatchEncoder, "Patches": Patches})
-            self.models[model_dir] = model
-
-        return model, None
-
     def do_prediction(
             self, patches, img, model,
             n_batch_inference=1, marginal_of_patch_percent=0.1,
@@ -1397,9 +1351,6 @@ class Eynollah:
         self.logger.debug("enter extract_page")
         cont_page = []
         if not self.ignore_page_extraction:
-            if not self.dir_in:
-                self.model_page, _ = self.start_new_session_and_model(self.model_page_dir)
-
             img = cv2.GaussianBlur(self.image, (5, 5), 0)
             img_page_prediction = self.do_prediction(False, img, self.model_page)
             imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
@@ -1447,8 +1398,6 @@ class Eynollah:
                 img = np.copy(img_bin).astype(np.uint8)
             else:
                 img = self.imread()
-            if not self.dir_in:
-                self.model_page, _ = self.start_new_session_and_model(self.model_page_dir)
             img = cv2.GaussianBlur(img, (5, 5), 0)
             img_page_prediction = self.do_prediction(False, img, self.model_page)
 
@@ -1476,11 +1425,6 @@ class Eynollah:
         self.logger.debug("enter extract_text_regions")
         img_height_h = img.shape[0]
         img_width_h = img.shape[1]
-        if not self.dir_in:
-            if patches:
-                self.model_region_fl, _ = self.start_new_session_and_model(self.model_region_dir_fully)
-            else:
-                self.model_region_fl_np, _ = self.start_new_session_and_model(self.model_region_dir_fully_np)
         model_region = self.model_region_fl if patches else self.model_region_fl_np
 
         if self.light_version:
@@ -1512,11 +1456,6 @@ class Eynollah:
         self.logger.debug("enter extract_text_regions")
         img_height_h = img.shape[0]
         img_width_h = img.shape[1]
-        if not self.dir_in:
-            if patches:
-                self.model_region_fl, _ = self.start_new_session_and_model(self.model_region_dir_fully)
-            else:
-                self.model_region_fl_np, _ = self.start_new_session_and_model(self.model_region_dir_fully_np)
         model_region = self.model_region_fl if patches else self.model_region_fl_np
 
         if not patches:
@@ -1647,8 +1586,6 @@ class Eynollah:
 
     def textline_contours(self, img, use_patches, scaler_h, scaler_w, num_col_classifier=None):
         self.logger.debug('enter textline_contours')
-        if not self.dir_in:
-            self.model_textline, _ = self.start_new_session_and_model(self.model_textline_dir)
 
         #img = img.astype(np.uint8)
         img_org = np.copy(img)
@@ -1750,9 +1687,6 @@ class Eynollah:
         img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new)
         img_resized = resize_image(img,img_h_new, img_w_new )
 
-        if not self.dir_in:
-            self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light_only_images_extraction)
-
         prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region)
 
         prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )
@@ -1841,7 +1775,6 @@ class Eynollah:
         img_height_h = img_org.shape[0]
         img_width_h = img_org.shape[1]
 
-        #model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens)
         #print(num_col_classifier,'num_col_classifier')
         
         if num_col_classifier == 1:
@@ -1864,8 +1797,6 @@ class Eynollah:
         #if self.input_binary:
             #img_bin = np.copy(img_resized)
         ###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30):
-            ###if not self.dir_in:
-                ###self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization)
             ###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5)
                 
             ####print("inside bin ", time.time()-t_bin)
@@ -1881,8 +1812,6 @@ class Eynollah:
         ###else:
             ###img_bin = np.copy(img_resized)
         if self.ocr and not self.input_binary:
-            if not self.dir_in:
-                self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization)
             prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5)
             prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
             prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
@@ -1905,12 +1834,7 @@ class Eynollah:
         #plt.show()
         if not skip_layout_and_reading_order:
             #print("inside 2 ", time.time()-t_in)
-            if not self.dir_in:
-                self.model_region_1_2, _ = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np)
-                ##self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light)
-
             if num_col_classifier == 1 or num_col_classifier == 2:
-                model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np)
                 if self.image_org.shape[0]/self.image_org.shape[1] > 2.5:
                     self.logger.debug("resized to %dx%d for %d cols",
                                       img_resized.shape[1], img_resized.shape[0], num_col_classifier)
@@ -2008,9 +1932,6 @@ class Eynollah:
         img_org = np.copy(img)
         img_height_h = img_org.shape[0]
         img_width_h = img_org.shape[1]
-        
-        if not self.dir_in:
-            self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens)
 
         ratio_y=1.3
         ratio_x=1
@@ -2037,11 +1958,8 @@ class Eynollah:
             prediction_regions_org=prediction_regions_org[:,:,0]
             prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros_y[:,:]==1)]=0
 
-            if not self.dir_in:
-                self.model_region_p2, _ = self.start_new_session_and_model(self.model_region_dir_p2)
-
             img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]))
-            
+
             prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2)
             prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h )
 
@@ -2066,15 +1984,11 @@ class Eynollah:
                 if self.input_binary:
                     prediction_bin = np.copy(img_org)
                 else:
-                    if not self.dir_in:
-                        self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization)
                     prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5)
                     prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
                     prediction_bin = 255 * (prediction_bin[:,:,0]==0)
                     prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
-                
-                if not self.dir_in:
-                    self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens)
+
                 ratio_y=1
                 ratio_x=1
 
@@ -2107,17 +2021,10 @@ class Eynollah:
         except:
             if self.input_binary:
                 prediction_bin = np.copy(img_org)
-                
-                if not self.dir_in:
-                    self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization)
                 prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5)
                 prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
                 prediction_bin = 255 * (prediction_bin[:,:,0]==0)
                 prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
-            
-                if not self.dir_in:
-                    self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens)
-                    
             else:
                 prediction_bin = np.copy(img_org)
             ratio_y=1
@@ -2747,10 +2654,6 @@ class Eynollah:
         img_org = np.copy(img)
         img_height_h = img_org.shape[0]
         img_width_h = img_org.shape[1]
-        
-        if not self.dir_in:
-            self.model_table, _ = self.start_new_session_and_model(self.model_table_dir)
-        
         patches = False
         if self.light_version:
             prediction_table = self.do_prediction_new_concept(patches, img, self.model_table)
@@ -3386,8 +3289,12 @@ class Eynollah:
         return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d,
                 regions_without_separators_d, regions_fully, regions_without_separators,
                 polygons_of_marginals, contours_tables)
-    
-    def our_load_model(self, model_file):
+
+    @staticmethod
+    def our_load_model(model_file):
+        if model_file.endswith('.h5') and Path(model_file[:-3]).exists():
+            # prefer SavedModel over HDF5 format if it exists
+            model_file = model_file[:-3]
         try:
             model = load_model(model_file, compile=False)
         except:
@@ -3438,9 +3345,6 @@ class Eynollah:
         img_header_and_sep = resize_image(img_header_and_sep, height1, width1)
         img_poly = resize_image(img_poly, height3, width3)
 
-        if not self.dir_in:
-            self.model_reading_order, _ = self.start_new_session_and_model(self.model_reading_order_dir)
-
         inference_bs = 3
         input_1 = np.zeros((inference_bs, height1, width1, 3))
         ordered = [list(range(len(co_text_all)))]