From 5a27e46b2282c68b2dc156110059c9ec197d9e7f Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 16 Apr 2026 04:56:38 +0200 Subject: [PATCH] =?UTF-8?q?keep=20seps=20over=20artificial=20boundaries=20?= =?UTF-8?q?to=20improve=20col=20separation=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (thresholding and decoding with artificial boundary class can overwrite existing column separators, which in turn can contribute to missing column boundaries; this prioritises seps over boundaries, which does not impair separation of instances, as seps will separate text/image/etc instances just as well as artificial boundaries) --- src/eynollah/eynollah.py | 12 ++++++++---- src/eynollah/utils/__init__.py | 8 +++++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index d564f87..180a12a 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -628,6 +628,7 @@ class Eynollah: thresholding_for_artificial_class=False, threshold_art_class=0.1, artificial_class=4, + separator_class=0, ): self.logger.debug("enter do_prediction_new_concept (patches=%d)", patches) @@ -653,7 +654,8 @@ class Eynollah: label=artificial_class, only=True, skeletonize=True, - dilate=3) + dilate=3, + keep=separator_class) conf_text = resize_image(label_p_pred[:, :, 1], img_h_page, img_w_page) return prediction, conf_text @@ -799,7 +801,8 @@ class Eynollah: label=artificial_class, only=True, skeletonize=True, - dilate=3) + dilate=3, + keep=separator_class) gc.collect() return prediction, confidence @@ -1071,7 +1074,7 @@ class Eynollah: self.logger.debug('enter textline_contours') kwargs = dict(artificial_class=2, - n_batch_inference=3, + n_batch_inference=1 if self.tables else 3, # avoid OOM thresholding_for_artificial_class=True, threshold_art_class=self.threshold_art_class_textline) if use_patches: @@ -1131,7 +1134,8 @@ class Eynollah: #print("inside 2 ", time.time()-t_in) kwargs = dict(n_batch_inference=1, thresholding_for_artificial_class=True, - threshold_art_class=self.threshold_art_class_layout) + threshold_art_class=self.threshold_art_class_layout, + separator_class=3) if num_col_classifier == 1 or num_col_classifier == 2: if img_height_h / img_width_h > 2.5: self.logger.debug("resized to %dx%d for %d cols", diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index f8638db..ef27f24 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -1953,7 +1953,8 @@ def seg_mask_label(segmap:np.ndarray, only:bool=False, label:int=2, skeletonize:bool=False, - dilate:int=0 + dilate:int=0, + keep:int=0, ) -> None: """ overwrite an existing segmentation map from a binary mask with a given label @@ -1966,11 +1967,14 @@ def seg_mask_label(segmap:np.ndarray, only: whether to suppress the `label` outside `mask` skeletonize: whether to transform the mask to its skeleton dilate: whether to also apply dilatation after this (convolution with square kernel of given size) + keep: if nonzero, a clas label to be kept untouched Use this to enforce specific confidence thresholds or rules after segmentation. """ if not mask.any(): return + if keep: + keepmask = segmap == keep if only: segmap[segmap == label] = 0 if skeletonize: @@ -1982,3 +1986,5 @@ def seg_mask_label(segmap:np.ndarray, kernel = np.ones((dilate, dilate), np.uint8) mask = cv2.dilate(mask.astype(np.uint8), kernel, iterations=1) > 0 segmap[mask] = label + if keep: + segmap[keepmask] = keep