mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-04-30 19:22:03 +02:00
get_marginals(): improve peak point threshold criterion…
in search of valid peaks (gaps between text columns), - drop absolute values for minimum gap depth (likely crafted for some fixed resolution examples) - instead, use criterion relative to maximum column depth and page height (trying to loosely approximate the prior constants, albeit somewhat more permissive)
This commit is contained in:
parent
4bdea39c98
commit
6d55d0b87b
1 changed files with 1 additions and 10 deletions
|
|
@ -37,16 +37,7 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N
|
||||||
text_with_lines_y_eroded = text_with_lines_eroded.sum(axis=0)
|
text_with_lines_y_eroded = text_with_lines_eroded.sum(axis=0)
|
||||||
|
|
||||||
max_textline_thickness_percent = 100. * text_with_lines_y_eroded.max() / height
|
max_textline_thickness_percent = 100. * text_with_lines_y_eroded.max() / height
|
||||||
|
min_textline_thickness = max_textline_thickness_percent / 100. * height / 20.
|
||||||
# rs: min_textline_thickness seems to be calibrated for some fixed resolution,
|
|
||||||
# but text_with_lines varies in size!
|
|
||||||
if max_textline_thickness_percent < 30:
|
|
||||||
min_textline_thickness = 8
|
|
||||||
elif max_textline_thickness_percent < 50:
|
|
||||||
min_textline_thickness = 20
|
|
||||||
else:
|
|
||||||
min_textline_thickness = 45
|
|
||||||
# min_textline_thickness = max_textline_thickness_percent / 100. * height / 20.
|
|
||||||
|
|
||||||
# plt.figure()
|
# plt.figure()
|
||||||
# ax1 = plt.subplot(2, 1, 1, title="text_with_lines_eroded")
|
# ax1 = plt.subplot(2, 1, 1, title="text_with_lines_eroded")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue