do_prediction*: avoid 3-channel results, simplify further…

- `do_prediction/_new_concept`: avoid unnecessary `np.repeat` on results, aggregate intermediate artificial class mask and confidence data in extra arrays - callers: avoid unnecessary thresholding the result arrays - callers: adapt (no need to slice into channels) - simplify by refactoring thresholding and skeletonization into function `seg_mask_label` - `extract_text_regions*`: drop unused second result array - `textline_contours`: avoid calculating unused unpatched prediction
2026-06-28 07:49:21 +02:00 · 2026-03-03 01:20:16 +01:00 · 2026-03-03 01:20:16 +01:00 · 3370a3aa85
commit 3370a3aa85
parent ff7dc31a68
3 changed files with 173 additions and 182 deletions
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -35,7 +35,6 @@ import numpy as np
 import shapely.affinity
 from scipy.signal import find_peaks
 from scipy.ndimage import gaussian_filter1d
-from skimage.morphology import skeletonize
 from ocrd_utils import tf_disable_interactive_logs
 import statistics

@ -86,6 +85,7 @@ from .utils import (
    box2rect,
    find_num_col,
    otsu_copy_binary,
+    seg_mask_label,
    putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
    check_any_text_region_in_model_one_is_main_or_header_light,
    small_textlines_to_parent_adherence2,
@ -474,7 +474,7 @@ class Eynollah:
        if self.input_binary:
            img = self.imread()
            prediction_bin = self.do_prediction(True, img, self.model_zoo.get("binarization"), n_batch_inference=5)
-            prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
+            prediction_bin = 255 * (prediction_bin == 0)
            prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8)
            img= np.copy(prediction_bin)
            img_bin = prediction_bin
@ -623,7 +623,8 @@ class Eynollah:

    def do_prediction(
            self, patches, img, model,
-            n_batch_inference=1, marginal_of_patch_percent=0.1,
+            n_batch_inference=1,
+            marginal_of_patch_percent=0.1,
            thresholding_for_some_classes=False,
            thresholding_for_heading=False,
            thresholding_for_artificial_class=False,
@ -638,26 +639,24 @@ class Eynollah:
        if not patches:
            img_h_page = img.shape[0]
            img_w_page = img.shape[1]
-            img = img / float(255.0)
+            img = img / 255.0
            img = resize_image(img, img_height_model, img_width_model)

            label_p_pred = model.predict(img[np.newaxis], verbose=0)[0]
            seg = np.argmax(label_p_pred, axis=2)

            if thresholding_for_artificial_class:
-                seg_art = label_p_pred[:, :, artificial_class]
-                seg_art = (seg_art >= threshold_art_class).astype(int)
-
-                seg[skeletonize(seg_art)] = artificial_class
+                seg_mask_label(
+                    seg, label_p_pred[:, :, artificial_class] >= threshold_art_class,
+                    label=artificial_class,
+                    skeletonize=True)

            if thresholding_for_heading:
-                seg_header = label_p_pred[:, :, 2]
+                seg_mask_label(
+                    seg, label_p_pred[:, :, 2] >= 0.2,
+                    label=2)

-                seg[seg_header >= 0.2] = 2
-
-            seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
-            prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
-            return prediction_true
+            return resize_image(seg, img_h_page, img_w_page).astype(np.uint8)

        if img.shape[0] < img_height_model:
            img = resize_image(img, img_height_model, img.shape[1])
@ -672,8 +671,9 @@ class Eynollah:
        #img = img.astype(np.float16)
        img_h = img.shape[0]
        img_w = img.shape[1]
-        prediction_true = np.zeros((img_h, img_w, 3))
-        mask_true = np.zeros((img_h, img_w))
+        prediction = np.zeros((img_h, img_w), dtype=np.uint8)
+        if thresholding_for_artificial_class:
+            mask_artificial_class = np.zeros((img_h, img_w), dtype=bool)
        nxf = math.ceil(img_w / float(width_mid))
        nyf = math.ceil(img_h / float(height_mid))

@ -684,212 +684,6 @@ class Eynollah:
        list_y_u = []
        list_y_d = []

-        batch_indexer = 0
-        img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3))
-        for i in range(nxf):
-            for j in range(nyf):
-                index_x_d = i * width_mid
-                index_x_u = index_x_d + img_width_model
-                index_y_d = j * height_mid
-                index_y_u = index_y_d + img_height_model
-                if index_x_u > img_w:
-                    index_x_u = img_w
-                    index_x_d = img_w - img_width_model
-                if index_y_u > img_h:
-                    index_y_u = img_h
-                    index_y_d = img_h - img_height_model
-
-                list_i_s.append(i)
-                list_j_s.append(j)
-                list_x_u.append(index_x_u)
-                list_x_d.append(index_x_d)
-                list_y_d.append(index_y_d)
-                list_y_u.append(index_y_u)
-
-                img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
-                batch_indexer += 1
-
-                if (batch_indexer == n_batch_inference or
-                    # last batch
-                    i == nxf - 1 and j == nyf - 1):
-                    self.logger.debug("predicting patches on %s", str(img_patch.shape))
-                    label_p_pred = model.predict(img_patch, verbose=0)
-                    seg = np.argmax(label_p_pred, axis=3)
-
-                    if thresholding_for_some_classes:
-                        seg_not_base = label_p_pred[:,:,:,4]
-                        seg_not_base = (seg_not_base > 0.03).astype(int)
-
-                        seg_line = label_p_pred[:,:,:,3]
-                        seg_line = (seg_line > 0.1).astype(int)
-
-                        seg_background = label_p_pred[:,:,:,0]
-                        seg_background = (seg_background > 0.25).astype(int)
-
-                        seg[seg_not_base==1]=4
-                        seg[seg_background==1]=0
-                        seg[(seg_line==1) & (seg==0)]=3
-
-                    if thresholding_for_artificial_class:
-                        seg_art = label_p_pred[:, :, :, artificial_class]
-                        seg_art = (seg_art >= threshold_art_class).astype(int)
-
-                        ##seg[seg_art==1]=artificial_class
-
-                    indexer_inside_batch = 0
-                    for i_batch, j_batch in zip(list_i_s, list_j_s):
-                        seg_in = seg[indexer_inside_batch]
-
-                        if thresholding_for_artificial_class:
-                            seg_in_art = seg_art[indexer_inside_batch]
-
-                        index_y_u_in = list_y_u[indexer_inside_batch]
-                        index_y_d_in = list_y_d[indexer_inside_batch]
-
-                        index_x_u_in = list_x_u[indexer_inside_batch]
-                        index_x_d_in = list_x_d[indexer_inside_batch]
-
-                        where = np.index_exp[index_y_d_in:index_y_u_in,
-                                             index_x_d_in:index_x_u_in]
-                        if (i_batch == 0 and
-                            j_batch == 0):
-                            inbox = np.index_exp[0:-margin or None,
-                                                 0:-margin or None]
-                        elif (i_batch == nxf - 1 and
-                              j_batch == nyf - 1):
-                            inbox = np.index_exp[margin:,
-                                                 margin:]
-                        elif (i_batch == 0 and
-                              j_batch == nyf - 1):
-                            inbox = np.index_exp[margin:,
-                                                 0:-margin or None]
-                        elif (i_batch == nxf - 1 and
-                              j_batch == 0):
-                            inbox = np.index_exp[0:-margin or None,
-                                                 margin:]
-                        elif (i_batch == 0 and
-                              j_batch != 0 and
-                              j_batch != nyf - 1):
-                            inbox = np.index_exp[margin:-margin or None,
-                                                 0:-margin or None]
-                        elif (i_batch == nxf - 1 and
-                              j_batch != 0 and
-                              j_batch != nyf - 1):
-                            inbox = np.index_exp[margin:-margin or None,
-                                                 margin:]
-                        elif (i_batch != 0 and
-                              i_batch != nxf - 1 and
-                              j_batch == 0):
-                            inbox = np.index_exp[0:-margin or None,
-                                                 margin:-margin or None]
-                        elif (i_batch != 0 and
-                              i_batch != nxf - 1 and
-                              j_batch == nyf - 1):
-                            inbox = np.index_exp[margin:,
-                                                 margin:-margin or None]
-                        else:
-                            inbox = np.index_exp[margin:-margin or None,
-                                                 margin:-margin or None]
-                        prediction_true[where][inbox] = seg_in[inbox + (np.newaxis,)]
-                        if thresholding_for_artificial_class:
-                            prediction_true[where][inbox + (1,)] = seg_in_art[inbox]
-
-                        indexer_inside_batch += 1
-
-
-                    list_i_s = []
-                    list_j_s = []
-                    list_x_u = []
-                    list_x_d = []
-                    list_y_u = []
-                    list_y_d = []
-
-                    batch_indexer = 0
-                    img_patch[:] = 0
-
-        prediction_true = prediction_true.astype(np.uint8)
-
-        if thresholding_for_artificial_class:
-            kernel_min = np.ones((3, 3), np.uint8)
-            prediction_true[:,:,0][prediction_true[:,:,0]==artificial_class] = 0
-
-            skeleton_art = skeletonize(prediction_true[:,:,1]).astype(np.uint8)
-            skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
-
-            prediction_true[:,:,0][skeleton_art==1]=artificial_class
-        #del model
-        gc.collect()
-        return prediction_true
-
-    def do_prediction_new_concept(
-            self, patches, img, model,
-            n_batch_inference=1, marginal_of_patch_percent=0.1,
-            thresholding_for_artificial_class=False,
-            threshold_art_class=0.1,
-            artificial_class=4,
-    ):
-
-        self.logger.debug("enter do_prediction_new_concept")
-        img_height_model = model.layers[-1].output_shape[1]
-        img_width_model = model.layers[-1].output_shape[2]
-
-        if not patches:
-            img_h_page = img.shape[0]
-            img_w_page = img.shape[1]
-            img = img / 255.0
-            img = resize_image(img, img_height_model, img_width_model)
-
-            label_p_pred = model.predict(img[np.newaxis], verbose=0)[0]
-            seg = np.argmax(label_p_pred, axis=2)
-
-            seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
-            prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
-
-            if thresholding_for_artificial_class:
-                kernel_min = np.ones((3, 3), np.uint8)
-                seg_art = label_p_pred[:, :, artificial_class]
-                seg_art = (seg_art >= threshold_art_class).astype(int)
-                #seg[seg_art==1]=4
-                seg_art = resize_image(seg_art, img_h_page, img_w_page).astype(np.uint8)
-
-                prediction_true[:,:,0][prediction_true[:,:,0]==artificial_class] = 0
-
-                skeleton_art = skeletonize(seg_art).astype(np.uint8)
-                skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
-
-                prediction_true[:,:,0][skeleton_art==1] = artificial_class
-
-            seg_text = resize_image(label_p_pred[:, :, 1] , img_h_page, img_w_page)
-            return prediction_true, seg_text
-
-        if img.shape[0] < img_height_model:
-            img = resize_image(img, img_height_model, img.shape[1])
-        if img.shape[1] < img_width_model:
-            img = resize_image(img, img.shape[0], img_width_model)
-
-        self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model)
-        margin = int(marginal_of_patch_percent * img_height_model)
-        width_mid = img_width_model - 2 * margin
-        height_mid = img_height_model - 2 * margin
-        img = img / 255.0
-        img = img.astype(np.float16)
-        img_h = img.shape[0]
-        img_w = img.shape[1]
-        prediction_true = np.zeros((img_h, img_w, 3))
-        confidence_matrix = np.zeros((img_h, img_w))
-        mask_true = np.zeros((img_h, img_w))
-        nxf = img_w / float(width_mid)
-        nyf = img_h / float(height_mid)
-        nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf)
-        nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf)
-
-        list_i_s = []
-        list_j_s = []
-        list_x_u = []
-        list_x_d = []
-        list_y_u = []
-        list_y_d = []
-
        batch_indexer = 0
        img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3))
        for i in range(nxf):
@ -923,9 +717,19 @@ class Eynollah:
                    label_p_pred = model.predict(img_patch, verbose=0)
                    seg = np.argmax(label_p_pred, axis=3)

+                    if thresholding_for_some_classes:
+                        seg_mask_label(
+                            seg, label_p_pred[:,:,:,4] > 0.03,
+                            label=4) # 
+                        seg_mask_label(
+                            seg, label_p_pred[:,:,:,0] > 0.25,
+                            label=0) # bg
+                        seg_mask_label(
+                            seg, label_p_pred[:,:,:,3] > 0.10 & seg == 0,
+                            label=3) # line
+
                    if thresholding_for_artificial_class:
-                        seg_art = label_p_pred[:, :, :, artificial_class]
-                        seg_art = (seg_art >= threshold_art_class).astype(int)
+                        seg_art = label_p_pred[:, :, :, artificial_class] >= threshold_art_class

                    indexer_inside_batch = 0
                    for i_batch, j_batch in zip(list_i_s, list_j_s):
@ -981,14 +785,193 @@ class Eynollah:
                        else:
                            inbox = np.index_exp[margin:-margin or None,
                                                 margin:-margin or None]
-                        prediction_true[where][inbox] = seg_in[inbox + (np.newaxis,)]
-                        confidence_matrix[where][inbox] = label_p_pred[(0,) + inbox + (1,)]
-                        # rs: why is prediction_true 3ch when only 1st gets used?
-                        #     artificial boundary class map should be extra array
+                        prediction[where][inbox] = seg_in[inbox]
+                        if thresholding_for_artificial_class:
+                            mask_artificial_class[where][inbox] = seg_in_art[inbox]
+
+                        indexer_inside_batch += 1
+
+
+                    list_i_s = []
+                    list_j_s = []
+                    list_x_u = []
+                    list_x_d = []
+                    list_y_u = []
+                    list_y_d = []
+
+                    batch_indexer = 0
+                    img_patch[:] = 0
+
+        if thresholding_for_artificial_class:
+            seg_mask_label(prediction, mask_artificial_class,
+                           label=artificial_class,
+                           only=True,
+                           skeletonize=True,
+                           dilate=3)
+        gc.collect()
+        return prediction
+
+    def do_prediction_new_concept(
+            self, patches, img, model,
+            n_batch_inference=1,
+            marginal_of_patch_percent=0.1,
+            thresholding_for_artificial_class=False,
+            threshold_art_class=0.1,
+            artificial_class=4,
+    ):
+
+        self.logger.debug("enter do_prediction_new_concept (patches=%d)", patches)
+        img_height_model = model.layers[-1].output_shape[1]
+        img_width_model = model.layers[-1].output_shape[2]
+
+        img = img / 255.0
+        img = img.astype(np.float16)
+
+        if not patches:
+            img_h_page = img.shape[0]
+            img_w_page = img.shape[1]
+            img = resize_image(img, img_height_model, img_width_model)
+
+            label_p_pred = model.predict(img[np.newaxis], verbose=0)[0]
+            seg = np.argmax(label_p_pred, axis=2)
+
+            prediction = resize_image(seg, img_h_page, img_w_page).astype(np.uint8)
+
+            if thresholding_for_artificial_class:
+                mask = resize_image(label_p_pred[:, :, artificial_class],
+                                    img_h_page, img_w_page) >= threshold_art_class
+                seg_mask_label(prediction, mask,
+                               label=artificial_class,
+                               only=True,
+                               skeletonize=True,
+                               dilate=3)
+
+            conf_text = resize_image(label_p_pred[:, :, 1], img_h_page, img_w_page)
+            return prediction, conf_text
+
+        if img.shape[0] < img_height_model:
+            img = resize_image(img, img_height_model, img.shape[1])
+        if img.shape[1] < img_width_model:
+            img = resize_image(img, img.shape[0], img_width_model)
+
+        self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model)
+        margin = int(marginal_of_patch_percent * img_height_model)
+        width_mid = img_width_model - 2 * margin
+        height_mid = img_height_model - 2 * margin
+        img_h = img.shape[0]
+        img_w = img.shape[1]
+        prediction = np.zeros((img_h, img_w), dtype=np.uint8)
+        confidence = np.zeros((img_h, img_w))
+        if thresholding_for_artificial_class:
+            mask_artificial_class = np.zeros((img_h, img_w), dtype=bool)
+        nxf = math.ceil(img_w / float(width_mid))
+        nyf = math.ceil(img_h / float(height_mid))
+
+        list_i_s = []
+        list_j_s = []
+        list_x_u = []
+        list_x_d = []
+        list_y_u = []
+        list_y_d = []
+
+        batch_indexer = 0
+        img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3))
+        for i in range(nxf):
+            for j in range(nyf):
+                index_x_d = i * width_mid
+                index_x_u = index_x_d + img_width_model
+                index_y_d = j * height_mid
+                index_y_u = index_y_d + img_height_model
+                if index_x_u > img_w:
+                    index_x_u = img_w
+                    index_x_d = img_w - img_width_model
+                if index_y_u > img_h:
+                    index_y_u = img_h
+                    index_y_d = img_h - img_height_model
+
+                list_i_s.append(i)
+                list_j_s.append(j)
+                list_x_u.append(index_x_u)
+                list_x_d.append(index_x_d)
+                list_y_d.append(index_y_d)
+                list_y_u.append(index_y_u)
+
+                img_patch[batch_indexer] = img[index_y_d:index_y_u,
+                                               index_x_d:index_x_u]
+                batch_indexer += 1
+
+                if (batch_indexer == n_batch_inference or
+                    # last batch
+                    i == nxf - 1 and j == nyf - 1):
+                    self.logger.debug("predicting patches on %s", str(img_patch.shape))
+                    label_p_pred = model.predict(img_patch,verbose=0)
+                    seg = np.argmax(label_p_pred, axis=3)
+                    conf = label_p_pred[:, :, :, 1]
+
+                    if thresholding_for_artificial_class:
+                        seg_art = label_p_pred[:, :, :, artificial_class] >= threshold_art_class
+
+                    indexer_inside_batch = 0
+                    for i_batch, j_batch in zip(list_i_s, list_j_s):
+                        seg_in = seg[indexer_inside_batch]
+                        conf_in = conf[indexer_inside_batch]
+
+                        if thresholding_for_artificial_class:
+                            seg_in_art = seg_art[indexer_inside_batch]
+
+                        index_y_u_in = list_y_u[indexer_inside_batch]
+                        index_y_d_in = list_y_d[indexer_inside_batch]
+
+                        index_x_u_in = list_x_u[indexer_inside_batch]
+                        index_x_d_in = list_x_d[indexer_inside_batch]
+
+                        where = np.index_exp[index_y_d_in:index_y_u_in,
+                                             index_x_d_in:index_x_u_in]
+                        if (i_batch == 0 and
+                            j_batch == 0):
+                            inbox = np.index_exp[0:-margin or None,
+                                                 0:-margin or None]
+                        elif (i_batch == nxf - 1 and
+                              j_batch == nyf - 1):
+                            inbox = np.index_exp[margin:,
+                                                 margin:]
+                        elif (i_batch == 0 and
+                              j_batch == nyf - 1):
+                            inbox = np.index_exp[margin:,
+                                                 0:-margin or None]
+                        elif (i_batch == nxf - 1 and
+                              j_batch == 0):
+                            inbox = np.index_exp[0:-margin or None,
+                                                 margin:]
+                        elif (i_batch == 0 and
+                              j_batch != 0 and
+                              j_batch != nyf - 1):
+                            inbox = np.index_exp[margin:-margin or None,
+                                                 0:-margin or None]
+                        elif (i_batch == nxf - 1 and
+                              j_batch != 0 and
+                              j_batch != nyf - 1):
+                            inbox = np.index_exp[margin:-margin or None,
+                                                 margin:]
+                        elif (i_batch != 0 and
+                              i_batch != nxf - 1 and
+                              j_batch == 0):
+                            inbox = np.index_exp[0:-margin or None,
+                                                 margin:-margin or None]
+                        elif (i_batch != 0 and
+                              i_batch != nxf - 1 and
+                              j_batch == nyf - 1):
+                            inbox = np.index_exp[margin:,
+                                                 margin:-margin or None]
+                        else:
+                            inbox = np.index_exp[margin:-margin or None,
+                                                 margin:-margin or None]
+                        prediction[where][inbox] = seg_in[inbox]
+                        confidence[where][inbox] = conf_in[inbox]
                        # rs: why does confidence_matrix only get text-label scores?
                        #     should be scores at final argmax
                        if thresholding_for_artificial_class:
-                            prediction_true[where][inbox + (1,)] = seg_in_art[inbox]
+                            mask_artificial_class[where][inbox] = seg_in_art[inbox]

                        indexer_inside_batch += 1

@ -1002,29 +985,25 @@ class Eynollah:
                    batch_indexer = 0
                    img_patch[:] = 0

-        prediction_true = prediction_true.astype(np.uint8)
-
        if thresholding_for_artificial_class:
-            kernel_min = np.ones((3, 3), np.uint8)
-            prediction_true[:,:,0][prediction_true[:,:,0]==artificial_class] = 0
-
-            skeleton_art = skeletonize(prediction_true[:,:,1]).astype(np.uint8)
-            skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
-
-            prediction_true[:,:,0][skeleton_art==1]=artificial_class
+            seg_mask_label(prediction, mask_artificial_class,
+                           label=artificial_class,
+                           only=True,
+                           skeletonize=True,
+                           dilate=3)
+        gc.collect()
+        return prediction, confidence
+
+
        gc.collect()
-        return prediction_true, confidence_matrix

    def extract_page(self):
        self.logger.debug("enter extract_page")
        cont_page = []
        if not self.ignore_page_extraction:
            img = np.copy(self.image)#cv2.GaussianBlur(self.image, (5, 5), 0)
-            img_page_prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
-            imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
-            _, thresh = cv2.threshold(imgray, 0, 255, 0)
-            ##thresh = cv2.dilate(thresh, KERNEL, iterations=3)
-            contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+            prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
+            contours, _ = cv2.findContours(prediction, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

            if len(contours)>0:
                cnt_size = np.array([cv2.contourArea(contours[j])
@ -1068,12 +1047,9 @@ class Eynollah:
            else:
                img = self.imread()
            img = cv2.GaussianBlur(img, (5, 5), 0)
-            img_page_prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
-
-            imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
-            _, thresh = cv2.threshold(imgray, 0, 255, 0)
-            thresh = cv2.dilate(thresh, KERNEL, iterations=3)
-            contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+            prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
+            prediction = cv2.dilate(prediction, KERNEL, iterations=3)
+            contours, _ = cv2.findContours(prediction, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            if len(contours)>0:
                cnt_size = np.array([cv2.contourArea(contours[j])
                                     for j in range(len(contours))])
@ -1122,7 +1098,7 @@ class Eynollah:
                                                thresholding_for_heading=thresholding_for_heading)
        prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
        self.logger.debug("exit extract_text_regions")
-        return prediction_regions, prediction_regions
+        return prediction_regions

    def extract_text_regions(self, img, patches, cols):
        self.logger.debug("enter extract_text_regions")
@ -1133,7 +1109,7 @@ class Eynollah:
        prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1)
        prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
        self.logger.debug("exit extract_text_regions")
-        return prediction_regions, None
+        return prediction_regions

    def get_textlines_of_a_textregion_sorted(self, textlines_textregion, cx_textline, cy_textline, w_h_textline):
        N = len(cy_textline)
@ -1280,26 +1256,15 @@ class Eynollah:
                                                 threshold_art_class=self.threshold_art_class_textline)

        prediction_textline = resize_image(prediction_textline, img_h, img_w)
-        textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1

-        old_art = np.copy(textline_mask_tot_ea_art)
+        #prediction_textline_longshot = self.do_prediction(False, img, self.model_zoo.get("textline"))
+        #prediction_textline_longshot = resize_image(prediction_textline_longshot, img_h, img_w)

-        textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1
-        textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8')
-
-        prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1
-
-        #cv2.imwrite('prediction_textline2.png', prediction_textline[:,:,0])
-
-        prediction_textline_longshot = self.do_prediction(False, img, self.model_zoo.get("textline"))
-        prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w)
-
-
-        #cv2.imwrite('prediction_textline.png', prediction_textline[:,:,0])
-        #sys.exit()
        self.logger.debug('exit textline_contours')
-        return ((prediction_textline[:, :, 0]==1).astype(np.uint8),
-                (prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8))
+        return ((prediction_textline==1).astype(np.uint8),
+                #(prediction_textline_longshot==1).astype(np.uint8),
+                None
+        )

    def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier):
        self.logger.debug("enter get_regions_light_v")
@ -1334,7 +1299,6 @@ class Eynollah:
            ###prediction_bin = self.do_prediction(True, img_resized, self.model_zoo.get_model("binarization"), n_batch_inference=5)

            ####print("inside bin ", time.time()-t_bin)
-            ###prediction_bin=prediction_bin[:,:,0]
            ###prediction_bin = (prediction_bin[:,:]==0)*1
            ###prediction_bin = prediction_bin*255

@ -1374,7 +1338,7 @@ class Eynollah:
                    thresholding_for_artificial_class=True,
                    threshold_art_class=self.threshold_art_class_layout)
            else:
-                prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3))
+                prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1]))
                confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1]))
                prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept(
                    False, self.image_page_org_size, self.model_zoo.get("region_1_2"), n_batch_inference=1,
@ -1398,13 +1362,12 @@ class Eynollah:
        ###n_batch_inference=3,
        ###thresholding_for_some_classes=True)
        #print("inside 3 ", time.time()-t_in)
-        #plt.imshow(prediction_regions_org[:,:,0])
+        #plt.imshow(prediction_regions_org[:,:])
        #plt.show()

        prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
        confidence_matrix = resize_image(confidence_matrix, img_height_h, img_width_h )
        img_bin = resize_image(img_bin, img_height_h, img_width_h )
-        prediction_regions_org=prediction_regions_org[:,:,0]

        mask_seps_only = (prediction_regions_org[:,:] == 3)*1
        mask_texts_only = (prediction_regions_org[:,:] ==1)*1
@ -1795,7 +1758,7 @@ class Eynollah:
        patches = False
        prediction_table, _ = self.do_prediction_new_concept(patches, img, self.model_zoo.get("table"))
        prediction_table = prediction_table.astype(np.int16)
-        return prediction_table[:,:,0]
+        return prediction_table

    def run_graphics_and_columns_light(
            self, text_regions_p_1, textline_mask_tot_ea,
@ -1972,12 +1935,12 @@ class Eynollah:
            textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
            text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
            table_prediction_n = rotate_image(table_prediction, slope_deskew)
-            regions_without_separators_d = (text_regions_p_d[:, :] == 1) * 1
+            regions_without_separators_d = (text_regions_p_d == 1) * 1
            if self.tables:
-                regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
-        regions_without_separators = (text_regions_p[:, :] == 1) * 1
-        # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1
-        #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions)
+                regions_without_separators_d[table_prediction_n == 1] = 1
+        regions_without_separators = (text_regions_p == 1) * 1
+        # ( (text_regions_p==1) | (text_regions_p==2) )*1
+        #self.return_regions_without_separators_new(text_regions_p,img_only_regions)
        #print(time.time()-t_0_box,'time box in 1')
        if self.tables:
            regions_without_separators[table_prediction ==1 ] = 1
@ -1999,10 +1962,10 @@ class Eynollah:
        if not erosion_hurts:
            if np.abs(slope_deskew) < SLOPE_THRESHOLD:
                regions_without_separators = regions_without_separators.astype(np.uint8)
-                regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6)
+                regions_without_separators = cv2.erode(regions_without_separators, KERNEL, iterations=6)
            else:
                regions_without_separators_d = regions_without_separators_d.astype(np.uint8)
-                regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
+                regions_without_separators_d = cv2.erode(regions_without_separators_d, KERNEL, iterations=6)
        #print(time.time()-t_0_box,'time box in 3')
        t1 = time.time()
        if np.abs(slope_deskew) < SLOPE_THRESHOLD:
@ -2026,14 +1989,11 @@ class Eynollah:
        if self.tables:
            text_regions_p[table_prediction == 1] = 10
        img_revised_tab = text_regions_p[:, :]
-        else:
-            img_revised_tab = text_regions_p[:,:]
-        #img_revised_tab = text_regions_p[:, :]
        polygons_of_images = return_contours_of_interested_region(text_regions_p, 2)

        label_marginalia = 4
        min_area_mar = 0.00001
-        marginal_mask = (text_regions_p[:,:]==label_marginalia)*1
+        marginal_mask = (text_regions_p==label_marginalia)*1
        marginal_mask = marginal_mask.astype('uint8')
        marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2)

@ -2093,7 +2053,7 @@ class Eynollah:

        image_page = image_page.astype(np.uint8)
        #print("full inside 1", time.time()- t_full0)
-        regions_fully, regions_fully_only_drop = self.extract_text_regions_new(
+        regions_fully = self.extract_text_regions_new(
            img_bin_light,
            False, cols=num_col_classifier)
        #print("full inside 2", time.time()- t_full0)
@ -2103,36 +2063,33 @@ class Eynollah:

        # the separators in full layout will not be written on layout
        if not self.reading_order_machine_based:
-            text_regions_p[:,:][regions_fully[:,:,0]==5]=6
-        ###regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4
+            text_regions_p[regions_fully==5]=6

-        #text_regions_p[:,:][regions_fully[:,:,0]==6]=6
-        ##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p)
-        ##regions_fully[:, :, 0][regions_fully_only_drop[:, :] == 4] = 4
+        #text_regions_p[:,:][regions_fully[:,:]==6]=6
        drop_capital_label_in_full_layout_model = 3

-        drops = (regions_fully[:,:,0]==drop_capital_label_in_full_layout_model)*1
+        drops = (regions_fully==drop_capital_label_in_full_layout_model)*1
        drops= drops.astype(np.uint8)

-        regions_fully[:,:,0][regions_fully[:,:,0]==drop_capital_label_in_full_layout_model] = 1
+        regions_fully[regions_fully==drop_capital_label_in_full_layout_model] = 1

-        drops = cv2.erode(drops[:,:], KERNEL, iterations=1)
-        regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model
+        drops = cv2.erode(drops, KERNEL, iterations=1)
+        regions_fully[drops==1] = drop_capital_label_in_full_layout_model

        regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(
            regions_fully, drop_capital_label_in_full_layout_model, text_regions_p)
-        ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier)
+        ##regions_fully_np = self.extract_text_regions(image_page, False, cols=num_col_classifier)
        ##if num_col_classifier > 2:
-            ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0
+            ##regions_fully_np[regions_fully_np == 4] = 0
        ##else:
            ##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p)

        ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully,
        ###    regions_fully_np, img_only_regions)
-        # plt.imshow(regions_fully[:,:,0])
+        # plt.imshow(regions_fully)
        # plt.show()
-        text_regions_p[:, :][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 4
-        ####text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4
+        text_regions_p[regions_fully == drop_capital_label_in_full_layout_model] = 4
+        ####text_regions_p[regions_fully_np == 4] = 4
        #plt.imshow(text_regions_p)
        #plt.show()
        ####if not self.tables:
@ -2141,14 +2098,14 @@ class Eynollah:
            text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
            regions_fully_n = rotate_image(regions_fully, slope_deskew)
            if not self.tables:
-                regions_without_separators_d = (text_regions_p_d[:, :] == 1) * 1
+                regions_without_separators_d = (text_regions_p_d == 1) * 1
        else:
            text_regions_p_d = None
            textline_mask_tot_d = None
            regions_without_separators_d = None
        if not self.tables:
-            regions_without_separators = (text_regions_p[:, :] == 1) * 1
-        img_revised_tab = np.copy(text_regions_p[:, :])
+            regions_without_separators = (text_regions_p == 1) * 1
+        img_revised_tab = np.copy(text_regions_p)
        polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5)

        self.logger.debug('exit run_boxes_full_layout')
@ -2853,7 +2810,7 @@ class Eynollah:
            textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0


-        text_only = (img_revised_tab[:, :] == 1) * 1
+        text_only = (img_revised_tab == 1) * 1
        if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
            text_only_d = ((text_regions_p_d[:, :] == 1)) * 1

--- a/src/eynollah/utils/init.py
+++ b/src/eynollah/utils/init.py
@ -13,6 +13,7 @@ from shapely import geometry
 import cv2
 from scipy.signal import find_peaks
 from scipy.ndimage import gaussian_filter1d
+from skimage import morphology

 from .is_nan import isNaN
 from .contour import (contours_in_same_horizon,
@ -220,20 +221,20 @@ def find_features_of_lines(contours_main):
 def boosting_headers_by_longshot_region_segmentation(textregion_pre_p, textregion_pre_np, img_only_text):
    textregion_pre_p_org = np.copy(textregion_pre_p)
    # 4 is drop capitals
-    headers_in_longshot = textregion_pre_np[:, :, 0] == 2
-    #headers_in_longshot = ((textregion_pre_np[:,:,0]==2) |
-    #                       (textregion_pre_np[:,:,0]==1))
-    textregion_pre_p[:, :, 0][headers_in_longshot &
-                              (textregion_pre_p[:, :, 0] != 4)] = 2
-    textregion_pre_p[:, :, 0][textregion_pre_p[:, :, 0] == 1] = 0
+    headers_in_longshot = textregion_pre_np == 2
+    #headers_in_longshot = ((textregion_pre_np==2) |
+    #                       (textregion_pre_np==1))
+    textregion_pre_p[headers_in_longshot &
+                     (textregion_pre_p != 4)] = 2
+    textregion_pre_p[textregion_pre_p == 1] = 0
    # earlier it was so, but by this manner the drop capitals are also deleted
-    # textregion_pre_p[:,:,0][(img_only_text[:,:]==1) &
-    #                         (textregion_pre_p[:,:,0]!=7) &
-    #                         (textregion_pre_p[:,:,0]!=2)] = 1
-    textregion_pre_p[:, :, 0][(img_only_text[:, :] == 1) &
-                              (textregion_pre_p[:, :, 0] != 7) &
-                              (textregion_pre_p[:, :, 0] != 4) &
-                              (textregion_pre_p[:, :, 0] != 2)] = 1
+    # textregion_pre_p[(img_only_text[:,:]==1) &
+    #                  (textregion_pre_p!=7) &
+    #                  (textregion_pre_p!=2)] = 1
+    textregion_pre_p[(img_only_text[:, :] == 1) &
+                     (textregion_pre_p != 7) &
+                     (textregion_pre_p != 4) &
+                     (textregion_pre_p != 2)] = 1
    return textregion_pre_p

 def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
@ -754,7 +755,7 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1):
    return layout_no_patch

 def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label, text_regions_p):
-    drop_only = (layout_in_patch[:, :, 0] == drop_capital_label) * 1
+    drop_only = (layout_in_patch == drop_capital_label) * 1
    contours_drop, hir_on_drop = return_contours_of_image(drop_only)
    contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop)

@ -772,7 +773,6 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop
    for jj in range(len(contours_drop_parent)):
        x, y, w, h = cv2.boundingRect(contours_drop_parent[jj])
        box = slice(y, y + h), slice(x, x + w)
-        box0 = box + (0,)
        mask_of_drop_cpaital_in_early_layout = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1]))
        mask_of_drop_cpaital_in_early_layout[box] = text_regions_p[box]

@ -783,12 +783,12 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop
        percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels)
        if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.6 and
            percent_text_to_all_in_drop >= 0.3):
-            layout_in_patch[box0] = drop_capital_label
+            layout_in_patch[box] = drop_capital_label
        else:
-            layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = drop_capital_label
-            layout_in_patch[box0][layout_in_patch[box0] == 0] = drop_capital_label
-            layout_in_patch[box0][layout_in_patch[box0] == 4] = drop_capital_label# images
-            #layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = 1#drop_capital_label
+            mask = ((layout_in_patch[box] == drop_capital_label) |
+                    (layout_in_patch[box] == 0) |
+                    (layout_in_patch[box] == 4))
+            layout_in_patch[box][mask] = drop_capital_label

    return layout_in_patch

@ -917,7 +917,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(

        all_pixels = (img == 255).sum()
        pixels_header=((img == 255) &
-                       (regions_model_full[:,:,0]==2)).sum()
+                       (regions_model_full==2)).sum()
        pixels_main = all_pixels - pixels_header

        if (( pixels_header / float(pixels_main) >= 0.6 and
@ -1947,3 +1947,38 @@ def ensure_array(obj: Iterable) -> np.ndarray:
    if not isinstance(obj, np.ndarray):
        return np.fromiter(obj, object)
    return obj
+
+def seg_mask_label(segmap:np.ndarray,
+                   mask:np.ndarray,
+                   only:bool=False,
+                   label:int=2,
+                   skeletonize:bool=False,
+                   dilate:int=0
+) -> None:
+    """
+    overwrite an existing segmentation map from a binary mask with a given label
+
+    Args:
+        segmap: integer array of existing segmentation labels ([H, W] or [B, H, W] shape)
+        mask: boolean array for specific label
+    Keyword Args:
+        label: the class label to be written
+        only: whether to suppress the `label` outside `mask`
+        skeletonize: whether to transform the mask to its skeleton
+        dilate: whether to also apply dilatation after this (convolution with square kernel of given size)
+
+    Use this to enforce specific confidence thresholds or rules after segmentation.
+    """
+    if not mask.any():
+        return
+    if only:
+        segmap[segmap == label] = 0
+    if skeletonize:
+        if mask.ndim == 3:
+            mask = np.stack(morphology.skeletonize(m) for m in mask)
+        else:
+            mask = morphology.skeletonize(mask)
+        if dilate:
+            kernel = np.ones((dilate, dilate), np.uint8)
+            mask = cv2.dilate(mask.astype(np.uint8), kernel, iterations=1) > 0
+    segmap[mask] = label
--- a/src/eynollah/utils/drop_capitals.py
+++ b/src/eynollah/utils/drop_capitals.py
@ -501,7 +501,7 @@ def adhere_drop_capital_region_into_corresponding_textline(

 def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1):

-    drop_only = (layout_no_patch[:, :, 0] == 4) * 1
+    drop_only = (layout_no_patch == 4) * 1
    contours_drop, hir_on_drop = return_contours_of_image(drop_only)
    contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop)

@ -529,9 +529,8 @@ def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1):
            if (((map_of_drop_contour_bb == 1) * 1).sum() / float(((map_of_drop_contour_bb == 5) * 1).sum()) * 100) >= 15:
                contours_drop_parent_final.append(contours_drop_parent[jj])

-    layout_no_patch[:, :, 0][layout_no_patch[:, :, 0] == 4] = 0
-
-    layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=(4, 4, 4))
+    layout_no_patch[layout_no_patch == 4] = 0
+    layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=4)

    return layout_no_patch