mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-04-30 19:22:03 +02:00
keep seps over artificial boundaries to improve col separation…
(thresholding and decoding with artificial boundary class can overwrite existing column separators, which in turn can contribute to missing column boundaries; this prioritises seps over boundaries, which does not impair separation of instances, as seps will separate text/image/etc instances just as well as artificial boundaries)
This commit is contained in:
parent
9d6ff65e1d
commit
5a27e46b22
2 changed files with 15 additions and 5 deletions
|
|
@ -628,6 +628,7 @@ class Eynollah:
|
|||
thresholding_for_artificial_class=False,
|
||||
threshold_art_class=0.1,
|
||||
artificial_class=4,
|
||||
separator_class=0,
|
||||
):
|
||||
|
||||
self.logger.debug("enter do_prediction_new_concept (patches=%d)", patches)
|
||||
|
|
@ -653,7 +654,8 @@ class Eynollah:
|
|||
label=artificial_class,
|
||||
only=True,
|
||||
skeletonize=True,
|
||||
dilate=3)
|
||||
dilate=3,
|
||||
keep=separator_class)
|
||||
|
||||
conf_text = resize_image(label_p_pred[:, :, 1], img_h_page, img_w_page)
|
||||
return prediction, conf_text
|
||||
|
|
@ -799,7 +801,8 @@ class Eynollah:
|
|||
label=artificial_class,
|
||||
only=True,
|
||||
skeletonize=True,
|
||||
dilate=3)
|
||||
dilate=3,
|
||||
keep=separator_class)
|
||||
gc.collect()
|
||||
return prediction, confidence
|
||||
|
||||
|
|
@ -1071,7 +1074,7 @@ class Eynollah:
|
|||
self.logger.debug('enter textline_contours')
|
||||
|
||||
kwargs = dict(artificial_class=2,
|
||||
n_batch_inference=3,
|
||||
n_batch_inference=1 if self.tables else 3, # avoid OOM
|
||||
thresholding_for_artificial_class=True,
|
||||
threshold_art_class=self.threshold_art_class_textline)
|
||||
if use_patches:
|
||||
|
|
@ -1131,7 +1134,8 @@ class Eynollah:
|
|||
#print("inside 2 ", time.time()-t_in)
|
||||
kwargs = dict(n_batch_inference=1,
|
||||
thresholding_for_artificial_class=True,
|
||||
threshold_art_class=self.threshold_art_class_layout)
|
||||
threshold_art_class=self.threshold_art_class_layout,
|
||||
separator_class=3)
|
||||
if num_col_classifier == 1 or num_col_classifier == 2:
|
||||
if img_height_h / img_width_h > 2.5:
|
||||
self.logger.debug("resized to %dx%d for %d cols",
|
||||
|
|
|
|||
|
|
@ -1953,7 +1953,8 @@ def seg_mask_label(segmap:np.ndarray,
|
|||
only:bool=False,
|
||||
label:int=2,
|
||||
skeletonize:bool=False,
|
||||
dilate:int=0
|
||||
dilate:int=0,
|
||||
keep:int=0,
|
||||
) -> None:
|
||||
"""
|
||||
overwrite an existing segmentation map from a binary mask with a given label
|
||||
|
|
@ -1966,11 +1967,14 @@ def seg_mask_label(segmap:np.ndarray,
|
|||
only: whether to suppress the `label` outside `mask`
|
||||
skeletonize: whether to transform the mask to its skeleton
|
||||
dilate: whether to also apply dilatation after this (convolution with square kernel of given size)
|
||||
keep: if nonzero, a clas label to be kept untouched
|
||||
|
||||
Use this to enforce specific confidence thresholds or rules after segmentation.
|
||||
"""
|
||||
if not mask.any():
|
||||
return
|
||||
if keep:
|
||||
keepmask = segmap == keep
|
||||
if only:
|
||||
segmap[segmap == label] = 0
|
||||
if skeletonize:
|
||||
|
|
@ -1982,3 +1986,5 @@ def seg_mask_label(segmap:np.ndarray,
|
|||
kernel = np.ones((dilate, dilate), np.uint8)
|
||||
mask = cv2.dilate(mask.astype(np.uint8), kernel, iterations=1) > 0
|
||||
segmap[mask] = label
|
||||
if keep:
|
||||
segmap[keepmask] = keep
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue