From ff7dc31a68d62eda306f13db5ab2307462f78b3a Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Mon, 2 Mar 2026 13:08:11 +0100
Subject: [PATCH] do_prediction*: rename identifiers for artificial class
 thresholding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- `do_prediction_new_concept` w/ patches: remove branches for
  `thresholding_for_artificial_class` (never used, wrong name)
- `do_prediction_new_concept` w/ patches: rename kwarg
  `thresholding_for_some_classes` →
  `thresholding_for_artificial_class`
- `do_prediction_new_concept`: introduce kwarg `artificial_class`
  (for baked constant 4)
- `do_prediction`: introduce kwarg `artificial_class`
  (for baked constant 2)
- `do_prediction/_new_concept`: rename kwargs
  `thresholding_for..._in_light_version` →
  `thresholding_for...`
- `do_prediction`: rename kwarg
  `threshold_art_class_textline` →
  `threshold_art_class`
- `do_prediction_new_concept`: rename kwarg
  `threshold_art_class_layout` →
  `threshold_art_class`
---
 src/eynollah/eynollah.py | 158 ++++++++++++++++-----------------------
 1 file changed, 65 insertions(+), 93 deletions(-)

diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 5cad8a0..b97aae2 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -393,16 +393,16 @@ class Eynollah:
 
         if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early:
             img_new = np.copy(img)
-            num_column_is_classified = False
+            img_is_resized = False
         #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000:
         elif img_h_new >= 8000:
             img_new = np.copy(img)
-            num_column_is_classified = False
+            img_is_resized = False
         else:
             img_new = resize_image(img, img_h_new, img_w_new)
-            num_column_is_classified = True
+            img_is_resized = True
 
-        return img_new, num_column_is_classified
+        return img_new, img_is_resized
 
     def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred):
         self.logger.debug("enter calculate_width_height_by_columns")
@@ -414,16 +414,16 @@ class Eynollah:
 
         if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early:
             img_new = np.copy(img)
-            num_column_is_classified = False
+            img_is_resized = False
         #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000:
         elif img_h_new >= 8000:
             img_new = np.copy(img)
-            num_column_is_classified = False
+            img_is_resized = False
         else:
             img_new = resize_image(img, img_h_new, img_w_new)
-            num_column_is_classified = True
+            img_is_resized = True
 
-        return img_new, num_column_is_classified
+        return img_new, img_is_resized
 
     def resize_image_with_column_classifier(self, is_image_enhanced, img_bin):
         self.logger.debug("enter resize_image_with_column_classifier")
@@ -624,10 +624,12 @@ class Eynollah:
     def do_prediction(
             self, patches, img, model,
             n_batch_inference=1, marginal_of_patch_percent=0.1,
-            thresholding_for_some_classes_in_light_version=False,
-            thresholding_for_artificial_class_in_light_version=False,
-            thresholding_for_fl_light_version=False,
-            threshold_art_class_textline=0.1):
+            thresholding_for_some_classes=False,
+            thresholding_for_heading=False,
+            thresholding_for_artificial_class=False,
+            threshold_art_class=0.1,
+            artificial_class=2,
+    ):
 
         self.logger.debug("enter do_prediction (patches=%d)", patches)
         img_height_model = model.layers[-1].output_shape[1]
@@ -639,27 +641,19 @@ class Eynollah:
             img = img / float(255.0)
             img = resize_image(img, img_height_model, img_width_model)
 
-            label_p_pred = model.predict(img[np.newaxis], verbose=0)
-            seg = np.argmax(label_p_pred, axis=3)[0]
+            label_p_pred = model.predict(img[np.newaxis], verbose=0)[0]
+            seg = np.argmax(label_p_pred, axis=2)
 
-            if thresholding_for_artificial_class_in_light_version:
-                seg_art = label_p_pred[0,:,:,2]
+            if thresholding_for_artificial_class:
+                seg_art = label_p_pred[:, :, artificial_class]
+                seg_art = (seg_art >= threshold_art_class).astype(int)
 
-                seg_art[seg_art<threshold_art_class_textline] = 0
-                seg_art[seg_art>0] =1
+                seg[skeletonize(seg_art)] = artificial_class
 
-                skeleton_art = skeletonize(seg_art)
-                skeleton_art = skeleton_art*1
+            if thresholding_for_heading:
+                seg_header = label_p_pred[:, :, 2]
 
-                seg[skeleton_art==1]=2
-
-            if thresholding_for_fl_light_version:
-                seg_header = label_p_pred[0,:,:,2]
-
-                seg_header[seg_header<0.2] = 0
-                seg_header[seg_header>0] =1
-
-                seg[seg_header==1]=2
+                seg[seg_header >= 0.2] = 2
 
             seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
             prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
@@ -722,7 +716,7 @@ class Eynollah:
                     label_p_pred = model.predict(img_patch, verbose=0)
                     seg = np.argmax(label_p_pred, axis=3)
 
-                    if thresholding_for_some_classes_in_light_version:
+                    if thresholding_for_some_classes:
                         seg_not_base = label_p_pred[:,:,:,4]
                         seg_not_base = (seg_not_base > 0.03).astype(int)
 
@@ -736,17 +730,17 @@ class Eynollah:
                         seg[seg_background==1]=0
                         seg[(seg_line==1) & (seg==0)]=3
 
-                    if thresholding_for_artificial_class_in_light_version:
-                        seg_art = label_p_pred[:,:,:,2]
-                        seg_art = (seg_art >= threshold_art_class_textline).astype(int)
+                    if thresholding_for_artificial_class:
+                        seg_art = label_p_pred[:, :, :, artificial_class]
+                        seg_art = (seg_art >= threshold_art_class).astype(int)
 
-                        ##seg[seg_art==1]=2
+                        ##seg[seg_art==1]=artificial_class
 
                     indexer_inside_batch = 0
                     for i_batch, j_batch in zip(list_i_s, list_j_s):
                         seg_in = seg[indexer_inside_batch]
 
-                        if thresholding_for_artificial_class_in_light_version:
+                        if thresholding_for_artificial_class:
                             seg_in_art = seg_art[indexer_inside_batch]
 
                         index_y_u_in = list_y_u[indexer_inside_batch]
@@ -797,7 +791,7 @@ class Eynollah:
                             inbox = np.index_exp[margin:-margin or None,
                                                  margin:-margin or None]
                         prediction_true[where][inbox] = seg_in[inbox + (np.newaxis,)]
-                        if thresholding_for_artificial_class_in_light_version:
+                        if thresholding_for_artificial_class:
                             prediction_true[where][inbox + (1,)] = seg_in_art[inbox]
 
                         indexer_inside_batch += 1
@@ -815,14 +809,14 @@ class Eynollah:
 
         prediction_true = prediction_true.astype(np.uint8)
 
-        if thresholding_for_artificial_class_in_light_version:
+        if thresholding_for_artificial_class:
             kernel_min = np.ones((3, 3), np.uint8)
-            prediction_true[:,:,0][prediction_true[:,:,0]==2] = 0
+            prediction_true[:,:,0][prediction_true[:,:,0]==artificial_class] = 0
 
             skeleton_art = skeletonize(prediction_true[:,:,1]).astype(np.uint8)
             skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
 
-            prediction_true[:,:,0][skeleton_art==1]=2
+            prediction_true[:,:,0][skeleton_art==1]=artificial_class
         #del model
         gc.collect()
         return prediction_true
@@ -830,10 +824,10 @@ class Eynollah:
     def do_prediction_new_concept(
             self, patches, img, model,
             n_batch_inference=1, marginal_of_patch_percent=0.1,
-            thresholding_for_some_classes_in_light_version=False,
-            thresholding_for_artificial_class_in_light_version=False,
-            threshold_art_class_textline=0.1,
-            threshold_art_class_layout=0.1):
+            thresholding_for_artificial_class=False,
+            threshold_art_class=0.1,
+            artificial_class=4,
+    ):
 
         self.logger.debug("enter do_prediction_new_concept")
         img_height_model = model.layers[-1].output_shape[1]
@@ -845,27 +839,28 @@ class Eynollah:
             img = img / 255.0
             img = resize_image(img, img_height_model, img_width_model)
 
-            label_p_pred = model.predict(img[np.newaxis], verbose=0)
-            seg = np.argmax(label_p_pred, axis=3)[0]
+            label_p_pred = model.predict(img[np.newaxis], verbose=0)[0]
+            seg = np.argmax(label_p_pred, axis=2)
 
             seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
             prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
 
-            if thresholding_for_artificial_class_in_light_version:
+            if thresholding_for_artificial_class:
                 kernel_min = np.ones((3, 3), np.uint8)
-                seg_art = label_p_pred[0,:,:,4]
-                seg_art = (seg_art >= threshold_art_class_layout).astype(int)
+                seg_art = label_p_pred[:, :, artificial_class]
+                seg_art = (seg_art >= threshold_art_class).astype(int)
                 #seg[seg_art==1]=4
                 seg_art = resize_image(seg_art, img_h_page, img_w_page).astype(np.uint8)
 
-                prediction_true[:,:,0][prediction_true[:,:,0]==4] = 0
+                prediction_true[:,:,0][prediction_true[:,:,0]==artificial_class] = 0
 
                 skeleton_art = skeletonize(seg_art).astype(np.uint8)
                 skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
 
-                prediction_true[:,:,0][skeleton_art==1] = 4
+                prediction_true[:,:,0][skeleton_art==1] = artificial_class
 
-            return prediction_true , resize_image(label_p_pred[0, :, :, 1] , img_h_page, img_w_page)
+            seg_text = resize_image(label_p_pred[:, :, 1] , img_h_page, img_w_page)
+            return prediction_true, seg_text
 
         if img.shape[0] < img_height_model:
             img = resize_image(img, img_height_model, img.shape[1])
@@ -873,8 +868,6 @@ class Eynollah:
             img = resize_image(img, img.shape[0], img_width_model)
 
         self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model)
-        thresholding = (thresholding_for_artificial_class_in_light_version or
-                        thresholding_for_some_classes_in_light_version)
         margin = int(marginal_of_patch_percent * img_height_model)
         width_mid = img_width_model - 2 * margin
         height_mid = img_height_model - 2 * margin
@@ -930,27 +923,15 @@ class Eynollah:
                     label_p_pred = model.predict(img_patch,verbose=0)
                     seg = np.argmax(label_p_pred, axis=3)
 
-                    if thresholding_for_some_classes_in_light_version:
-                        seg_art = label_p_pred[:,:,:,4]
-
-                        seg_art = (seg_art >= threshold_art_class_layout).astype(int)
-
-                        seg_line = label_p_pred[:,:,:,3]
-                        seg_line = (seg_line > 0.4).astype(int)
-
-                        ##seg[seg_art==1]=4
-                        #seg[(seg_line==1) & (seg==0)]=3
-                    if thresholding_for_artificial_class_in_light_version:
-                        seg_art = label_p_pred[:,:,:,2]
-
-                        seg_art = (seg_art >= threshold_art_class_textline).astype(int)
-                        ##seg[seg_art==1]=2
+                    if thresholding_for_artificial_class:
+                        seg_art = label_p_pred[:, :, :, artificial_class]
+                        seg_art = (seg_art >= threshold_art_class).astype(int)
 
                     indexer_inside_batch = 0
                     for i_batch, j_batch in zip(list_i_s, list_j_s):
                         seg_in = seg[indexer_inside_batch]
 
-                        if thresholding:
+                        if thresholding_for_artificial_class:
                             seg_in_art = seg_art[indexer_inside_batch]
 
                         index_y_u_in = list_y_u[indexer_inside_batch]
@@ -1006,7 +987,7 @@ class Eynollah:
                         #     artificial boundary class map should be extra array
                         # rs: why does confidence_matrix only get text-label scores?
                         #     should be scores at final argmax
-                        if thresholding:
+                        if thresholding_for_artificial_class:
                             prediction_true[where][inbox + (1,)] = seg_in_art[inbox]
 
                         indexer_inside_batch += 1
@@ -1023,23 +1004,14 @@ class Eynollah:
 
         prediction_true = prediction_true.astype(np.uint8)
 
-        if thresholding_for_artificial_class_in_light_version:
+        if thresholding_for_artificial_class:
             kernel_min = np.ones((3, 3), np.uint8)
-            prediction_true[:,:,0][prediction_true[:,:,0]==2] = 0
+            prediction_true[:,:,0][prediction_true[:,:,0]==artificial_class] = 0
 
             skeleton_art = skeletonize(prediction_true[:,:,1]).astype(np.uint8)
             skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
 
-            prediction_true[:,:,0][skeleton_art==1]=2
-
-        if thresholding_for_some_classes_in_light_version:
-            kernel_min = np.ones((3, 3), np.uint8)
-            prediction_true[:,:,0][prediction_true[:,:,0]==4] = 0
-
-            skeleton_art = skeletonize(prediction_true[:,:,1]).astype(np.uint8)
-            skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
-
-            prediction_true[:,:,0][skeleton_art==1]=4
+            prediction_true[:,:,0][skeleton_art==1]=artificial_class
         gc.collect()
         return prediction_true, confidence_matrix
 
@@ -1124,11 +1096,11 @@ class Eynollah:
         img_width_h = img.shape[1]
         model_region = self.model_zoo.get("region_fl") if patches else self.model_zoo.get("region_fl_np")
 
-        thresholding_for_fl_light_version = True
+        thresholding_for_heading = True
         if not patches:
             img = otsu_copy_binary(img).astype(np.uint8)
             prediction_regions = None
-            thresholding_for_fl_light_version = False
+            thresholding_for_heading = False
         elif cols:
             img = otsu_copy_binary(img).astype(np.uint8)
             if cols == 1:
@@ -1147,7 +1119,7 @@ class Eynollah:
         prediction_regions = self.do_prediction(patches, img, model_region,
                                                 marginal_of_patch_percent=0.1,
                                                 n_batch_inference=3,
-                                                thresholding_for_fl_light_version=thresholding_for_fl_light_version)
+                                                thresholding_for_heading=thresholding_for_heading)
         prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
         self.logger.debug("exit extract_text_regions")
         return prediction_regions, prediction_regions
@@ -1305,7 +1277,7 @@ class Eynollah:
         prediction_textline = self.do_prediction(use_patches, img, self.model_zoo.get("textline"),
                                                  marginal_of_patch_percent=0.15,
                                                  n_batch_inference=3,
-                                                 threshold_art_class_textline=self.threshold_art_class_textline)
+                                                 threshold_art_class=self.threshold_art_class_textline)
 
         prediction_textline = resize_image(prediction_textline, img_h, img_w)
         textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1
@@ -1399,15 +1371,15 @@ class Eynollah:
                                   img_resized.shape[1], img_resized.shape[0], num_col_classifier)
                 prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
                     True, img_resized, self.model_zoo.get("region_1_2"), n_batch_inference=1,
-                    thresholding_for_some_classes_in_light_version=True,
-                    threshold_art_class_layout=self.threshold_art_class_layout)
+                    thresholding_for_artificial_class=True,
+                    threshold_art_class=self.threshold_art_class_layout)
             else:
                 prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3))
                 confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1]))
                 prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept(
                     False, self.image_page_org_size, self.model_zoo.get("region_1_2"), n_batch_inference=1,
-                    thresholding_for_artificial_class_in_light_version=True,
-                    threshold_art_class_layout=self.threshold_art_class_layout)
+                    thresholding_for_artificial_class=True,
+                    threshold_art_class=self.threshold_art_class_layout)
                 ys = slice(*self.page_coord[0:2])
                 xs = slice(*self.page_coord[2:4])
                 prediction_regions_org[ys, xs] = prediction_regions_page
@@ -1420,11 +1392,11 @@ class Eynollah:
                               img_resized.shape[1], img_resized.shape[0], new_h, num_col_classifier)
             prediction_regions_org, confidence_matrix = self.do_prediction_new_concept(
                 True, img_resized, self.model_zoo.get("region_1_2"), n_batch_inference=2,
-                thresholding_for_some_classes_in_light_version=True,
-                threshold_art_class_layout=self.threshold_art_class_layout)
+                thresholding_for_artificial_class=True,
+                threshold_art_class=self.threshold_art_class_layout)
         ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_zoo.get_model("region"),
         ###n_batch_inference=3,
-        ###thresholding_for_some_classes_in_light_version=True)
+        ###thresholding_for_some_classes=True)
         #print("inside 3 ", time.time()-t_in)
         #plt.imshow(prediction_regions_org[:,:,0])
         #plt.show()