run/get_marginals(): simplify and speed up…

- `get_marginals` modifies region labels in-place anyways,
  so no need for retval
- de/rotate only inside `get_marginals` (for consistency)
- return early if no marginals detected
- `run_marginals`: only useful in 1 or 2 columns, so keep to
  that conditional branch; allows avoiding unnecessary resizing
  of images to and fro
- rename `text_regions_p_1` → `text_regions_p`
This commit is contained in:
Robert Sachunsky 2026-04-26 23:51:05 +02:00
parent 45a43f7e5e
commit 1f6db34adf
2 changed files with 28 additions and 45 deletions

View file

@ -1628,22 +1628,14 @@ class Eynollah:
return slope_deskew
def run_marginals(
self, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction):
self, num_col_classifier, slope_deskew, text_regions_p, table_prediction):
text_regions_p = np.array(text_regions_p_1)
if num_col_classifier in (1, 2):
try:
regions_without_separators = (text_regions_p == 1) * 1
if self.tables:
regions_without_separators[table_prediction == 1] = 1
regions_without_separators = regions_without_separators.astype(np.uint8)
text_regions_p = get_marginals(
rotate_image(regions_without_separators, slope_deskew), text_regions_p,
num_col_classifier, slope_deskew, kernel=KERNEL)
except Exception as e:
self.logger.error("exception %s", e)
regions_without_separators = (text_regions_p == 1).astype(np.uint8)
if self.tables:
regions_without_separators[table_prediction == 1] = 1
return text_regions_p
get_marginals(regions_without_separators, text_regions_p,
num_col_classifier, slope_deskew, kernel=KERNEL)
def get_full_layout(
self, image_page,
@ -2267,7 +2259,7 @@ class Eynollah:
t1 = time.time()
self.logger.info("Step 2/5: Layout Analysis")
(text_regions_p_1,
(text_regions_p,
erosion_hurts,
polygons_seplines,
polygons_text_early,
@ -2293,9 +2285,9 @@ class Eynollah:
#self.logger.info("Textregion detection took %.1fs ", time.time() - t1t)
(num_col, num_col_classifier,
page_coord, image_page, cont_page,
text_regions_p_1, table_prediction, textline_mask_tot_ea,
text_regions_p, table_prediction, textline_mask_tot_ea,
regions_confidence, table_confidence, textline_confidence) = \
self.run_graphics_and_columns(text_regions_p_1, textline_mask_tot_ea,
self.run_graphics_and_columns(text_regions_p, textline_mask_tot_ea,
regions_confidence, textline_confidence,
num_col_classifier, num_column_is_classified,
erosion_hurts, image)
@ -2344,13 +2336,10 @@ class Eynollah:
img_w_new = 2400
img_h_new = img_w_new * textline_mask_tot_ea.shape[0] // textline_mask_tot_ea.shape[1]
image_page = resize_image(image_page, img_h_new, img_w_new)
textline_mask_tot_ea = resize_image(textline_mask_tot_ea, img_h_new, img_w_new)
text_regions_p_1 = resize_image(text_regions_p_1, img_h_new, img_w_new)
table_prediction = resize_image(table_prediction, img_h_new, img_w_new)
text_regions_p = \
self.run_marginals(num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
text_regions_p_new = resize_image(text_regions_p, img_h_new, img_w_new)
table_prediction_new = resize_image(table_prediction, img_h_new, img_w_new)
self.run_marginals(num_col_classifier, slope_deskew, text_regions_p_new, table_prediction_new)
text_regions_p = resize_image(text_regions_p_new, org_h_l_m, org_w_l_m)
if self.plotter:
self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page, image['name'])
@ -2378,20 +2367,10 @@ class Eynollah:
if not np.any(text_regions_p == label_text):
text_regions_p[text_regions_p == label_marg] = label_text
t5 = time.time()
self.logger.info("Marginalia extraction took %.1fs", t5 - t4)
self.logger.info("Step 3/5: Text Line Detection")
if self.curved_line:
self.logger.info("Mode: Curved line detection")
if num_col_classifier in (1,2):
image_page = resize_image(image_page, org_h_l_m, org_w_l_m)
textline_mask_tot_ea = resize_image(textline_mask_tot_ea, org_h_l_m, org_w_l_m)
text_regions_p = resize_image(text_regions_p, org_h_l_m, org_w_l_m)
text_regions_p_1 = resize_image(text_regions_p_1, org_h_l_m, org_w_l_m)
table_prediction = resize_image(table_prediction, org_h_l_m, org_w_l_m)
self.logger.info(f"Detection of marginals took {time.time() - t1:.1f}s")
t1 = time.time()
regions_fully, regionsfl_confidence, regions_without_separators = \
self.get_full_layout(image_page,
textline_mask_tot_ea,
@ -2479,6 +2458,8 @@ class Eynollah:
all_found_textline_polygons_marginals = dilate_textline_contours(
all_found_textline_polygons_marginals)
else:
self.logger.info("Mode: Curved line detection")
textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2)
all_found_textline_polygons, slopes = \
self.get_slopes_and_deskew_new_curved(

View file

@ -6,16 +6,20 @@ from .contour import find_center_of_contours, return_contours_of_interested_regi
from .resize import resize_image
from .rotate import rotate_image
def get_marginals(text_mask_d, early_layout, num_col, slope_deskew, kernel=None,
def get_marginals(text_mask, early_layout, num_col, slope_deskew,
kernel=None,
label_text=1,
label_marg=4,
):
# rs: text_mask_d is already deskewed, while early_layout is not...
if kernel is None:
kernel = np.ones((5, 5), dtype=np.uint8)
kernel_hor = np.ones((1, 5), dtype=np.uint8)
text_mask_d = rotate_image(text_mask, slope_deskew)
main_mask_d = np.zeros_like(text_mask_d)
height, width = main_mask_d.shape
##text_mask_d=cv2.erode(text_mask_d,self.kernel,iterations=3)
text_mask_d_eroded = cv2.erode(text_mask_d,kernel,iterations=5)
text_mask_d_eroded = cv2.erode(text_mask_d, kernel, iterations=5)
if height <= 1500:
pass
@ -32,7 +36,6 @@ def get_marginals(text_mask_d, early_layout, num_col, slope_deskew, kernel=None,
# rs: and back to original size
text_mask_d = resize_image(text_mask_d, height, width)
kernel_hor = np.ones((1, 5), dtype=np.uint8)
text_mask_d = cv2.erode(text_mask_d, kernel_hor, iterations=6)
text_mask_d_y = text_mask_d.sum(axis=0)
text_mask_d_y_eroded = text_mask_d_eroded.sum(axis=0)
@ -98,7 +101,7 @@ def get_marginals(text_mask_d, early_layout, num_col, slope_deskew, kernel=None,
# ax2.scatter(peaks_orig, region_sum_0[peaks_orig], label='peaks')
# plt.legend()
# plt.show()
return early_layout
return
point_right = peaks_right[np.argmax(scores[peaks_right])]
#point_left = first_nonzero
point_left = 0
@ -116,6 +119,8 @@ def get_marginals(text_mask_d, early_layout, num_col, slope_deskew, kernel=None,
point_right = 0
main_mask_d[:, point_left: point_right] = 1
if not np.any(main_mask_d):
return
# plt.figure()
# ax1 = plt.subplot(2, 2, 1)
@ -167,6 +172,3 @@ def get_marginals(text_mask_d, early_layout, num_col, slope_deskew, kernel=None,
# plt.imshow(early_layout)
# plt.show()
else:
pass
return early_layout