mbro: simplify, add drop-caps as well, reduce batch size…

- do_order_of_regions_with_model: * add `polygons_of_drop_capitals`, order these indices as well (model was not trained for this, but it works) * explicit label identifiers instead of number literals * map marginals and images correctly * simplify (a lot) * reduce inference batch size to accomodate 8 GB VRAM GPUs - return_indexes_of_contours_located_inside_another_list_of_contours: simplify
2026-04-30 19:22:03 +02:00 · 2026-04-20 17:10:24 +02:00 · 2026-04-20 17:10:24 +02:00 · de65a55a04
commit de65a55a04
parent 0dfc9d911f
1 changed files with 121 additions and 168 deletions
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -36,7 +36,6 @@ import numpy as np
 import shapely.affinity
 from scipy.signal import find_peaks
 from scipy.ndimage import gaussian_filter1d
-import statistics

 try:
    import matplotlib.pyplot as plt
@ -1849,8 +1848,28 @@ class Eynollah:
                regions_without_separators,
                polygons_of_marginals, polygons_of_tables)

-    def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p):
-
+    def do_order_of_regions_with_model(
+            self,
+            contours_only_text_parent,
+            contours_only_text_parent_h,
+            # not trained on drops directly, but it does work:
+            polygons_of_drop_capitals,
+            text_regions_p,
+            # input labels as in run_boxes_full_layout
+            # output labels as in RO model's read_xml
+            label_text=1,
+            label_head=2,
+            label_imgs=5,
+            label_imgs_ro=4,
+            label_seps=6,
+            label_seps_ro=5,
+            label_marg=8,
+            label_marg_ro=3,
+            label_drop=4,
+            # no drop-capital in RO model, yet
+            label_drop_ro=4,
+    ):
+        # FIXME: use model.input_shape
        height1 =672#448
        width1 = 448#224

@ -1860,158 +1879,107 @@ class Eynollah:
        height3 =672#448
        width3 = 448#224

-        inference_bs = 3
-
        ver_kernel = np.ones((5, 1), dtype=np.uint8)
        hor_kernel = np.ones((1, 5), dtype=np.uint8)
-
-
        min_cont_size_to_be_dilated = 10
-        if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
+        if len(contours_only_text_parent) > min_cont_size_to_be_dilated:
            (cx_conts, cy_conts,
             x_min_conts, x_max_conts,
             y_min_conts, y_max_conts,
             _) = find_new_features_of_contours(contours_only_text_parent)
-            args_cont_located = np.array(range(len(contours_only_text_parent)))
+            cx_conts = ensure_array(cx_conts)
+            cy_conts = ensure_array(cy_conts)
+            contours_only_text_parent = ensure_array(contours_only_text_parent)
+            args_cont = np.arange(len(contours_only_text_parent))

-            diff_y_conts = np.abs(y_max_conts[:]-y_min_conts)
            diff_x_conts = np.abs(x_max_conts[:]-x_min_conts)
+            mean_x = np.mean(diff_x_conts)
+            diff_x_ratio = diff_x_conts / mean_x

-            mean_x = statistics.mean(diff_x_conts)
-            median_x = statistics.median(diff_x_conts)
+            args_cont_excluded = args_cont[diff_x_ratio >= 1.3]
+            args_cont_included = args_cont[diff_x_ratio < 1.3]

-
-            diff_x_ratio= diff_x_conts/mean_x
-
-            args_cont_located_excluded = args_cont_located[diff_x_ratio>=1.3]
-            args_cont_located_included = args_cont_located[diff_x_ratio<1.3]
-
-            contours_only_text_parent_excluded = [contours_only_text_parent[ind]
-                                                  #contours_only_text_parent[diff_x_ratio>=1.3]
-                                                  for ind in range(len(contours_only_text_parent))
-                                                  if diff_x_ratio[ind]>=1.3]
-            contours_only_text_parent_included = [contours_only_text_parent[ind]
-                                                  #contours_only_text_parent[diff_x_ratio<1.3]
-                                                  for ind in range(len(contours_only_text_parent))
-                                                  if diff_x_ratio[ind]<1.3]
-
-            cx_conts_excluded = [cx_conts[ind]
-                                 #cx_conts[diff_x_ratio>=1.3]
-                                 for ind in range(len(cx_conts))
-                                 if diff_x_ratio[ind]>=1.3]
-            cx_conts_included = [cx_conts[ind]
-                                 #cx_conts[diff_x_ratio<1.3]
-                                 for ind in range(len(cx_conts))
-                                 if diff_x_ratio[ind]<1.3]
-            cy_conts_excluded = [cy_conts[ind]
-                                 #cy_conts[diff_x_ratio>=1.3]
-                                 for ind in range(len(cy_conts))
-                                 if diff_x_ratio[ind]>=1.3]
-            cy_conts_included = [cy_conts[ind]
-                                 #cy_conts[diff_x_ratio<1.3]
-                                 for ind in range(len(cy_conts))
-                                 if diff_x_ratio[ind]<1.3]
-
-            #print(diff_x_ratio, 'ratio')
-            text_regions_p = text_regions_p.astype('uint8')
-
-            if len(contours_only_text_parent_excluded)>0:
-                textregion_par = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1])).astype('uint8')
-                textregion_par = cv2.fillPoly(textregion_par, pts=contours_only_text_parent_included, color=(1,1))
+            if len(args_cont_excluded):
+                textregion_par = np.zeros_like(text_regions_p)
+                textregion_par = cv2.fillPoly(textregion_par,
+                                              pts=contours_only_text_parent[args_cont_included],
+                                              color=1)
            else:
-                textregion_par = (text_regions_p[:,:]==1)*1
-                textregion_par = textregion_par.astype('uint8')
+                textregion_par = (text_regions_p == 1).astype(np.uint8)

-            text_regions_p_textregions_dilated = cv2.erode(textregion_par , hor_kernel, iterations=2)
-            text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=4)
-            text_regions_p_textregions_dilated = cv2.erode(text_regions_p_textregions_dilated , hor_kernel, iterations=1)
-            text_regions_p_textregions_dilated = cv2.dilate(text_regions_p_textregions_dilated , ver_kernel, iterations=5)
-            text_regions_p_textregions_dilated[text_regions_p[:,:]>1] = 0
+            textregion_par = cv2.erode(textregion_par, hor_kernel, iterations=2)
+            textregion_par = cv2.dilate(textregion_par, ver_kernel, iterations=4)
+            textregion_par = cv2.erode(textregion_par, hor_kernel, iterations=1)
+            textregion_par = cv2.dilate(textregion_par, ver_kernel, iterations=5)
+            textregion_par[text_regions_p > 1] = 0

-
-            contours_only_dilated, hir_on_text_dilated = return_contours_of_image(text_regions_p_textregions_dilated)
+            contours_only_dilated, hir_on_text_dilated = return_contours_of_image(textregion_par)
            contours_only_dilated = return_parent_contours(contours_only_dilated, hir_on_text_dilated)

-            indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located = \
+            indexes_of_located_cont, _, cy_of_located = \
                self.return_indexes_of_contours_located_inside_another_list_of_contours(
-                    contours_only_dilated, contours_only_text_parent_included,
-                    cx_conts_included, cy_conts_included, args_cont_located_included)
+                    contours_only_dilated,
+                    cx_conts[args_cont_included],
+                    cy_conts[args_cont_included],
+                    args_cont_included)

+            indexes_of_located_cont.extend(args_cont_excluded[:, np.newaxis])
+            contours_only_dilated.extend(contours_only_text_parent[args_cont_excluded])

-            if len(args_cont_located_excluded)>0:
-                for ind in args_cont_located_excluded:
-                    indexes_of_located_cont.append(np.array([ind]))
-                    contours_only_dilated.append(contours_only_text_parent[ind])
-                    center_y_coordinates_of_located.append(0)
+            missing_textregions = np.setdiff1d(args_cont, np.concatenate(indexes_of_located_cont))

-            array_list = [np.array([elem]) if isinstance(elem, int) else elem for elem in indexes_of_located_cont]
-            flattened_array = np.concatenate([arr.ravel() for arr in array_list])
-            #print(len( np.unique(flattened_array)), 'indexes_of_located_cont uniques')
+            indexes_of_located_cont.extend(missing_textregions[:, np.newaxis])
+            contours_only_dilated.extend(contours_only_text_parent[missing_textregions])

-            missing_textregions = list( set(range(len(contours_only_text_parent))) - set(flattened_array) )
-            #print(missing_textregions, 'missing_textregions')
+            args_cont_h = np.arange(len(contours_only_text_parent_h))
+            indexes_of_located_cont.extend(args_cont_h[:, np.newaxis] +
+                                           len(contours_only_text_parent))

-            for ind in missing_textregions:
-                indexes_of_located_cont.append(np.array([ind]))
-                contours_only_dilated.append(contours_only_text_parent[ind])
-                center_y_coordinates_of_located.append(0)
+            args_cont_drop = np.arange(len(polygons_of_drop_capitals))
+            indexes_of_located_cont.extend(args_cont_drop[:, np.newaxis] +
+                                           len(contours_only_text_parent) +
+                                           len(contours_only_text_parent_h))

-
-            if contours_only_text_parent_h:
-                for vi in range(len(contours_only_text_parent_h)):
-                    indexes_of_located_cont.append(int(vi+len(contours_only_text_parent)))
-
-            array_list = [np.array([elem]) if isinstance(elem, int) else elem for elem in indexes_of_located_cont]
-            flattened_array = np.concatenate([arr.ravel() for arr in array_list])
-
-        y_len = text_regions_p.shape[0]
-        x_len = text_regions_p.shape[1]
-
-        img_poly = np.zeros((y_len,x_len), dtype='uint8')
-        img_poly[text_regions_p[:,:]==1] = 1
-        img_poly[text_regions_p[:,:]==2] = 2
-        img_poly[text_regions_p[:,:]==3] = 4
-        img_poly[text_regions_p[:,:]==6] = 5
-
-        img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
-        if contours_only_text_parent_h:
-            _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(
-                contours_only_text_parent_h)
-            for j in range(len(cy_main)):
-                img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,
-                                   int(x_min_main[j]):int(x_max_main[j])] = 1
-            co_text_all_org = contours_only_text_parent + contours_only_text_parent_h
-            if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
-                co_text_all = contours_only_dilated + contours_only_text_parent_h
-            else:
-                co_text_all = contours_only_text_parent + contours_only_text_parent_h
-        else:
-            co_text_all_org = contours_only_text_parent
-            if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
            co_text_all = contours_only_dilated
        else:
-                co_text_all = contours_only_text_parent
+            co_text_all = list(contours_only_text_parent)
+
+        img_poly = np.zeros_like(text_regions_p)
+        img_poly[text_regions_p == label_text] = label_text
+        img_poly[text_regions_p == label_head] = label_head
+        img_poly[text_regions_p == 3] = label_imgs # rs: ??
+        img_poly[text_regions_p == label_imgs] = label_imgs_ro
+        img_poly[text_regions_p == label_marg] = label_marg_ro
+        img_poly[text_regions_p == label_seps] = label_seps_ro
+
+        img_header_and_sep = np.zeros_like(text_regions_p)
+        for contour in contours_only_text_parent_h:
+            # rs: why (max:max+12) instad of (min:max)?
+            #     what about actual seps?
+            img_header_and_sep[contour[:, 0, 1].max(): contour[:, 0, 1].max() + 12,
+                               contour[:, 0, 0].min(): contour[:, 0, 0].max()] = 1
+        co_text_all.extend(contours_only_text_parent_h)
+        co_text_all.extend(polygons_of_drop_capitals)

        if not len(co_text_all):
            return []

-        labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool)
-        co_text_all = [(i/6).astype(int) for i in co_text_all]
+        # fill polygons in lower resolution to be faster
+        height, width = text_regions_p.shape
+        labels_con = np.zeros((height // 6, width // 6, len(co_text_all)), dtype=bool)
        for i in range(len(co_text_all)):
-            img = labels_con[:,:,i].astype(np.uint8)
-
-            #img = cv2.resize(img, (int(img.shape[1]/6), int(img.shape[0]/6)), interpolation=cv2.INTER_NEAREST)
-
-            cv2.fillPoly(img, pts=[co_text_all[i]], color=(1,))
-            labels_con[:,:,i] = img
-
-
+            img = np.zeros(labels_con.shape[:2], dtype=np.uint8)
+            cv2.fillPoly(img, pts=[co_text_all[i] // 6], color=1)
+            labels_con[:, :, i] = img
        labels_con = resize_image(labels_con.astype(np.uint8), height1, width1).astype(bool)
        img_header_and_sep = resize_image(img_header_and_sep, height1, width1)
-        img_poly = resize_image(img_poly, height3, width3)
-
-
+        img_poly = resize_image(img_poly, height1, width1)
+        labels_con[img_poly == label_seps_ro] = 2
+        labels_con[img_header_and_sep == 1] = 3
+        labels_con = labels_con / 3.
+        img_poly = img_poly / 5.

+        inference_bs = 1 # 3 (causes OOM on 8 GB GPUs)
        input_1 = np.zeros((inference_bs, height1, width1, 3))
        ordered = [list(range(len(co_text_all)))]
        index_update = 0
@ -2025,23 +1993,16 @@ class Eynollah:
            tot_counter = 0
            batch = []
            for j in ij_list:
-                img1 = labels_con[:,:,i].astype(float)
-                img2 = labels_con[:,:,j].astype(float)
-                img1[img_poly==5] = 2
-                img2[img_poly==5] = 2
-                img1[img_header_and_sep==1] = 3
-                img2[img_header_and_sep==1] = 3
-
-                input_1[len(batch), :, :, 0] = img1 / 3.
-                input_1[len(batch), :, :, 2] = img2 / 3.
-                input_1[len(batch), :, :, 1] = img_poly / 5.
+                input_1[len(batch), :, :, 0] = labels_con[:, :, i]
+                input_1[len(batch), :, :, 1] = img_poly
+                input_1[len(batch), :, :, 2] = labels_con[:, :, j]

                tot_counter += 1
                batch.append(j)
                if tot_counter % inference_bs == 0 or tot_counter == len(ij_list):
                    y_pr = self.model_zoo.get("reading_order").predict(input_1 , verbose=0)
-                    for jb, j in enumerate(batch):
-                        if y_pr[jb][0]>=0.5:
+                    for post_pr in y_pr:
+                        if post_pr[0] >= 0.5:
                            post_list.append(j)
                        else:
                            ante_list.append(j)
@ -2062,20 +2023,20 @@ class Eynollah:

        ordered = [i[0] for i in ordered]

-        if len(contours_only_text_parent)>min_cont_size_to_be_dilated:
+        if len(contours_only_text_parent) > min_cont_size_to_be_dilated:
            org_contours_indexes = []
-            for ind in range(len(ordered)):
-                region_with_curr_order = ordered[ind]
-                if region_with_curr_order < len(contours_only_dilated):
-                    if np.isscalar(indexes_of_located_cont[region_with_curr_order]):
-                        org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]])
+            for i in ordered:
+                if i < len(contours_only_dilated):
+                    if i >= len(cy_of_located):
+                        # excluded or missing dilated version of main region
+                        org_contours_indexes.extend(indexes_of_located_cont[i])
                    else:
-                        arg_sort_located_cont = np.argsort(center_y_coordinates_of_located[region_with_curr_order])
-                        org_contours_indexes.extend(
-                            np.array(indexes_of_located_cont[region_with_curr_order])[arg_sort_located_cont])
+                        # reconstructed dilated version of main region
+                        org_contours_indexes.extend(indexes_of_located_cont[i][
+                            np.argsort(cy_of_located[i])])
                else:
-                    org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]])
-
+                    # header or drop-capital region
+                    org_contours_indexes.extend(indexes_of_located_cont[i])
            return org_contours_indexes
        else:
            return ordered
@ -2161,29 +2122,19 @@ class Eynollah:
            return contours

    def return_indexes_of_contours_located_inside_another_list_of_contours(
-            self, contours, contours_loc, cx_main_loc, cy_main_loc, indexes_loc):
-        indexes_of_located_cont = []
-        center_x_coordinates_of_located = []
-        center_y_coordinates_of_located = []
-        #M_main_tot = [cv2.moments(contours_loc[j])
-                        #for j in range(len(contours_loc))]
-        #cx_main_loc = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
-        #cy_main_loc = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))]
-
-        for ij in range(len(contours)):
-            results = [cv2.pointPolygonTest(contours[ij], (cx_main_loc[ind], cy_main_loc[ind]), False)
-                        for ind in range(len(cy_main_loc)) ]
-            results = np.array(results)
-            indexes_in = np.where((results == 0) | (results == 1))
-            # [(results == 0) | (results == 1)]#np.where((results == 0) | (results == 1))
-            indexes = indexes_loc[indexes_in]
-
-            indexes_of_located_cont.append(indexes)
-            center_x_coordinates_of_located.append(np.array(cx_main_loc)[indexes_in] )
-            center_y_coordinates_of_located.append(np.array(cy_main_loc)[indexes_in] )
-
-        return indexes_of_located_cont, center_x_coordinates_of_located, center_y_coordinates_of_located
+            self, contours, centersx_loc, centersy_loc, indexes_loc):
+        indexes = []
+        centersx = []
+        centersy = []
+        for contour in contours:
+            results = np.array([cv2.pointPolygonTest(contour, (px, py), False)
+                                for px, py in zip(centersx_loc, centersy_loc)])
+            indexes_in = (results == 0) | (results == 1)
+            indexes.append(indexes_loc[indexes_in])
+            centersx.append(centersx_loc[indexes_in])
+            centersy.append(centersy_loc[indexes_in])

+        return indexes, centersx, centersy

    def filter_contours_without_textline_inside(
            self, contours_par, contours_textline,
@ -2945,6 +2896,7 @@ class Eynollah:
                        num_col_classifier, erosion_hurts, self.tables, self.right2left,
                        logger=self.logger)
        else:
+            polygons_of_drop_capitals = []
            contours_only_text_parent_h = []
            contours_only_text_parent_h_d_ordered = []

@ -2966,6 +2918,7 @@ class Eynollah:
            order_text_new = self.do_order_of_regions_with_model(
                contours_only_text_parent,
                contours_only_text_parent_h,
+                polygons_of_drop_capitals,
                text_regions_p)
        else:
            if np.abs(slope_deskew) < SLOPE_THRESHOLD: