From 6d55d0b87b7c8a14b2834348680ad07fbf05b3f3 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sat, 25 Apr 2026 02:23:16 +0200 Subject: [PATCH] =?UTF-8?q?get=5Fmarginals():=20improve=20peak=20point=20t?= =?UTF-8?q?hreshold=20criterion=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit in search of valid peaks (gaps between text columns), - drop absolute values for minimum gap depth (likely crafted for some fixed resolution examples) - instead, use criterion relative to maximum column depth and page height (trying to loosely approximate the prior constants, albeit somewhat more permissive) --- src/eynollah/utils/marginals.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py index 6a93a3b..8344b9a 100644 --- a/src/eynollah/utils/marginals.py +++ b/src/eynollah/utils/marginals.py @@ -37,16 +37,7 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N text_with_lines_y_eroded = text_with_lines_eroded.sum(axis=0) max_textline_thickness_percent = 100. * text_with_lines_y_eroded.max() / height - - # rs: min_textline_thickness seems to be calibrated for some fixed resolution, - # but text_with_lines varies in size! - if max_textline_thickness_percent < 30: - min_textline_thickness = 8 - elif max_textline_thickness_percent < 50: - min_textline_thickness = 20 - else: - min_textline_thickness = 45 - # min_textline_thickness = max_textline_thickness_percent / 100. * height / 20. + min_textline_thickness = max_textline_thickness_percent / 100. * height / 20. # plt.figure() # ax1 = plt.subplot(2, 1, 1, title="text_with_lines_eroded")