return_boxes_of_images_by_order_of_reading_new: sep label differs w/o -fl…

fix bug where in non-full mode, the wrong class label was assumed
for separator regions (3 in non- vs 6 in full layout mode):

- pass in separator mask instead of full segmentation map
- rename for clarity:
  - `regions_without_separators` → `text_mask` (alread binary)
  - `regions_with_separators` → `sep_mask` (now just binary)
This commit is contained in:
Robert Sachunsky 2026-04-16 05:16:23 +02:00
parent f5f2435a38
commit f29e876a7c
2 changed files with 29 additions and 25 deletions

View file

@ -1719,7 +1719,8 @@ class Eynollah:
t1 = time.time() t1 = time.time()
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(
splitter_y_new, regions_without_separators, text_regions_p, matrix_of_seps_ch, splitter_y_new, regions_without_separators,
text_regions_p == label_seps, matrix_of_seps_ch,
num_col_classifier, erosion_hurts, self.tables, self.right2left) num_col_classifier, erosion_hurts, self.tables, self.right2left)
boxes_d = None boxes_d = None
self.logger.debug("len(boxes): %s", len(boxes)) self.logger.debug("len(boxes): %s", len(boxes))
@ -1727,7 +1728,8 @@ class Eynollah:
else: else:
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(
splitter_y_new_d, regions_without_separators_d, text_regions_p_d, matrix_of_seps_ch_d, splitter_y_new_d, regions_without_separators_d,
text_regions_p_d == label_seps, matrix_of_seps_ch_d,
num_col_classifier, erosion_hurts, self.tables, self.right2left) num_col_classifier, erosion_hurts, self.tables, self.right2left)
boxes = None boxes = None
self.logger.debug("len(boxes): %s", len(boxes_d)) self.logger.debug("len(boxes): %s", len(boxes_d))
@ -2896,12 +2898,14 @@ class Eynollah:
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
boxes, _ = return_boxes_of_images_by_order_of_reading_new( boxes, _ = return_boxes_of_images_by_order_of_reading_new(
splitter_y_new, regions_without_separators, text_regions_p, matrix_of_seps_ch, splitter_y_new, regions_without_separators,
text_regions_p == label_seps, matrix_of_seps_ch,
num_col_classifier, erosion_hurts, self.tables, self.right2left, num_col_classifier, erosion_hurts, self.tables, self.right2left,
logger=self.logger) logger=self.logger)
else: else:
boxes_d, _ = return_boxes_of_images_by_order_of_reading_new( boxes_d, _ = return_boxes_of_images_by_order_of_reading_new(
splitter_y_new_d, regions_without_separators_d, text_regions_p_d, matrix_of_seps_ch_d, splitter_y_new_d, regions_without_separators_d,
text_regions_p_d == label_seps, matrix_of_seps_ch_d,
num_col_classifier, erosion_hurts, self.tables, self.right2left, num_col_classifier, erosion_hurts, self.tables, self.right2left,
logger=self.logger) logger=self.logger)
else: else:

View file

@ -1542,8 +1542,8 @@ def find_number_of_columns_in_document(
def return_boxes_of_images_by_order_of_reading_new( def return_boxes_of_images_by_order_of_reading_new(
splitter_y_new, splitter_y_new,
regions_without_separators, text_mask,
regions_with_separators, sep_mask,
matrix_of_seps_ch, matrix_of_seps_ch,
num_col_classifier, erosion_hurts, tables, num_col_classifier, erosion_hurts, tables,
right2left_readingorder, right2left_readingorder,
@ -1555,9 +1555,9 @@ def return_boxes_of_images_by_order_of_reading_new(
Arguments: Arguments:
* splitter_y_new: the y coordinates separating the parts * splitter_y_new: the y coordinates separating the parts
* regions_without_separators: (text) region mask with separators suppressed; * text_mask: binary text region mask
(needed to find per-part columns and to combine separators if possible) (needed to find per-part columns and to combine separators if possible)
* regions_with_separators: (full) region map with separators included; * sep_mask: binary separator region mask
(needed to elongate separators if possible) (needed to elongate separators if possible)
* matrix_of_seps: type and coordinates of horizontal and vertical separators, * matrix_of_seps: type and coordinates of horizontal and vertical separators,
as well as headings as well as headings
@ -1574,22 +1574,22 @@ def return_boxes_of_images_by_order_of_reading_new(
""" """
if right2left_readingorder: if right2left_readingorder:
regions_without_separators = cv2.flip(regions_without_separators,1) text_mask = cv2.flip(text_mask,1)
regions_with_separators = cv2.flip(regions_with_separators,1) sep_mask = cv2.flip(sep_mask,1)
if logger is None: if logger is None:
logger = getLogger(__package__) logger = getLogger(__package__)
logger.debug('enter return_boxes_of_images_by_order_of_reading_new') logger.debug('enter return_boxes_of_images_by_order_of_reading_new')
# def dbg_imshow(box, title): # def dbg_imshow(box, title):
# xmin, xmax, ymin, ymax = box # xmin, xmax, ymin, ymax = box
# plt.imshow(regions_with_separators) #, extent=[0, width_tot, bot, top]) # plt.imshow(1 * text_mask + 3 * sep_mask) #, extent=[0, width_tot, bot, top])
# plt.gca().add_patch(patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, # plt.gca().add_patch(patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
# fill=False, linewidth=1, edgecolor='r')) # fill=False, linewidth=1, edgecolor='r'))
# plt.title(title + " at %d:%d, %d:%d" % (ymin, ymax, xmin, xmax)) # plt.title(title + " at %d:%d, %d:%d" % (ymin, ymax, xmin, xmax))
# plt.show() # plt.show()
# def dbg_plt(box=None, title=None, rectangles=None, rectangles_showidx=False): # def dbg_plt(box=None, title=None, rectangles=None, rectangles_showidx=False):
# minx, maxx, miny, maxy = box or (0, None, 0, None) # minx, maxx, miny, maxy = box or (0, None, 0, None)
# img = regions_without_separators[miny:maxy, minx:maxx] # img = text_mask[miny:maxy, minx:maxx]
# plt.imshow(img) # plt.imshow(img)
# step = max(img.shape) // 10 # step = max(img.shape) // 10
# xrange = np.arange(0, img.shape[1], step) # xrange = np.arange(0, img.shape[1], step)
@ -1616,15 +1616,15 @@ def return_boxes_of_images_by_order_of_reading_new(
boxes=[] boxes=[]
peaks_neg_tot_tables = [] peaks_neg_tot_tables = []
splitter_y_new = np.array(splitter_y_new, dtype=int) splitter_y_new = np.array(splitter_y_new, dtype=int)
height_tot, width_tot = regions_without_separators.shape height_tot, width_tot = text_mask.shape
big_part = 22 * height_tot // 100 # percent height big_part = 22 * height_tot // 100 # percent height
_, ccomps, cstats, _ = cv2.connectedComponentsWithStats(regions_without_separators.astype(np.uint8)) _, ccomps, cstats, _ = cv2.connectedComponentsWithStats(text_mask.astype(np.uint8))
args_ver = matrix_of_seps_ch[:, 9] == 1 args_ver = matrix_of_seps_ch[:, 9] == 1
mask_ver = np.zeros_like(regions_without_separators, dtype=bool) mask_ver = np.zeros_like(sep_mask, dtype=bool)
for i in np.flatnonzero(args_ver): for i in np.flatnonzero(args_ver):
mask_ver[matrix_of_seps_ch[i, 6]: matrix_of_seps_ch[i, 7], mask_ver[matrix_of_seps_ch[i, 6]: matrix_of_seps_ch[i, 7],
matrix_of_seps_ch[i, 2]: matrix_of_seps_ch[i, 3]] = True matrix_of_seps_ch[i, 2]: matrix_of_seps_ch[i, 3]] = True
vertical_seps = 1 * ((regions_with_separators == 6) & mask_ver) vertical_seps = 1 * (sep_mask & mask_ver)
for top, bot in pairwise(splitter_y_new): for top, bot in pairwise(splitter_y_new):
# print("%d:%d" % (top, bot), 'i') # print("%d:%d" % (top, bot), 'i')
# dbg_plt([0, None, top, bot], "image cut for y split %d:%d" % (top, bot)) # dbg_plt([0, None, top, bot], "image cut for y split %d:%d" % (top, bot))
@ -1637,7 +1637,7 @@ def return_boxes_of_images_by_order_of_reading_new(
# np.max(matrix_new[:,8][matrix_new[:,9]==1]) >= # np.max(matrix_new[:,8][matrix_new[:,9]==1]) >=
# 0.1 * (np.abs(bot-top))): # 0.1 * (np.abs(bot-top))):
num_col, peaks_neg_fin = find_num_col( num_col, peaks_neg_fin = find_num_col(
regions_without_separators[top:bot], text_mask[top:bot],
# we do not expect to get all columns in small parts (headings etc.): # we do not expect to get all columns in small parts (headings etc.):
num_col_classifier if bot - top >= big_part else 1, num_col_classifier if bot - top >= big_part else 1,
tables, vertical_separators=vertical_seps[top: bot], tables, vertical_separators=vertical_seps[top: bot],
@ -1656,7 +1656,7 @@ def return_boxes_of_images_by_order_of_reading_new(
#print("peaks_neg_fin_org", peaks_neg_fin_org) #print("peaks_neg_fin_org", peaks_neg_fin_org)
if len(peaks_neg_fin) == 0: if len(peaks_neg_fin) == 0:
num_col, peaks_neg_fin = find_num_col( num_col, peaks_neg_fin = find_num_col(
regions_without_separators[top:bot], text_mask[top:bot],
num_col_classifier, tables, num_col_classifier, tables,
vertical_separators=vertical_seps[top: bot], vertical_separators=vertical_seps[top: bot],
# try to be less strict (lower threshold than above) # try to be less strict (lower threshold than above)
@ -1672,12 +1672,12 @@ def return_boxes_of_images_by_order_of_reading_new(
# dbg_plt([left, right, top, bot], # dbg_plt([left, right, top, bot],
# "image cut for y split %d:%d / x gap %d:%d" % ( # "image cut for y split %d:%d / x gap %d:%d" % (
# top, bot, left, right)) # top, bot, left, right))
# plt.plot(regions_without_separators[top:bot, left:right].sum(axis=0)) # plt.plot(text_mask[top:bot, left:right].sum(axis=0))
# plt.title("vertical projection (sum over y)") # plt.title("vertical projection (sum over y)")
# plt.show() # plt.show()
# try to get more peaks with different multipliers # try to get more peaks with different multipliers
num_col_expected = round((right - left) / width_tot * num_col_classifier) num_col_expected = round((right - left) / width_tot * num_col_classifier)
args = regions_without_separators[top:bot, left:right], num_col_expected, tables args = text_mask[top:bot, left:right], num_col_expected, tables
kwargs = dict(vertical_separators=vertical_seps[top: bot, left:right]) kwargs = dict(vertical_separators=vertical_seps[top: bot, left:right])
_, peaks_neg_fin1 = find_num_col(*args, **kwargs, multiplier=7.) _, peaks_neg_fin1 = find_num_col(*args, **kwargs, multiplier=7.)
_, peaks_neg_fin2 = find_num_col(*args, **kwargs, multiplier=5.) _, peaks_neg_fin2 = find_num_col(*args, **kwargs, multiplier=5.)
@ -1708,7 +1708,7 @@ def return_boxes_of_images_by_order_of_reading_new(
except: except:
logger.exception("cannot find peaks consistent with columns") logger.exception("cannot find peaks consistent with columns")
#num_col, peaks_neg_fin = find_num_col( #num_col, peaks_neg_fin = find_num_col(
# regions_without_separators[top:bot,:], # text_mask[top:bot,:],
# multiplier=7.0) # multiplier=7.0)
peaks_neg_tot = np.array([0] + peaks_neg_fin + [width_tot]) peaks_neg_tot = np.array([0] + peaks_neg_fin + [width_tot])
#print(peaks_neg_tot,'peaks_neg_tot') #print(peaks_neg_tot,'peaks_neg_tot')
@ -1721,7 +1721,7 @@ def return_boxes_of_images_by_order_of_reading_new(
args_nonver = matrix_new[:, 9] != 1 args_nonver = matrix_new[:, 9] != 1
for i in np.flatnonzero(args_nonver): for i in np.flatnonzero(args_nonver):
xmin, xmax, ymin, ymax, typ = matrix_new[i, [2, 3, 6, 7, 9]] xmin, xmax, ymin, ymax, typ = matrix_new[i, [2, 3, 6, 7, 9]]
cut = regions_with_separators[ymin: ymax] cut = sep_mask[ymin: ymax]
# dbg_imshow([xmin, xmax, ymin, ymax], "separator %d (%s)" % (i, "heading" if typ else "horizontal")) # dbg_imshow([xmin, xmax, ymin, ymax], "separator %d (%s)" % (i, "heading" if typ else "horizontal"))
starting = xmin - peaks_neg_tot starting = xmin - peaks_neg_tot
min_start = np.flatnonzero(starting >= 0)[-1] # last left-of min_start = np.flatnonzero(starting >= 0)[-1] # last left-of
@ -1819,7 +1819,7 @@ def return_boxes_of_images_by_order_of_reading_new(
x_min_hor_some = width_tot - x_max_hor_some x_min_hor_some = width_tot - x_max_hor_some
x_starting, x_ending, y_min, y_mid, y_max = return_multicol_separators_x_start_end( x_starting, x_ending, y_min, y_mid, y_max = return_multicol_separators_x_start_end(
regions_without_separators, peaks_neg_tot, top, bot, text_mask, peaks_neg_tot, top, bot,
x_min_hor_some, x_max_hor_some, cy_hor_some, y_min_hor_some, y_max_hor_some) x_min_hor_some, x_max_hor_some, cy_hor_some, y_min_hor_some, y_max_hor_some)
# dbg_plt([0, None, top, bot], "non-empty multi-column separators in current split", # dbg_plt([0, None, top, bot], "non-empty multi-column separators in current split",
# list(zip(peaks_neg_tot[x_starting], peaks_neg_tot[x_ending], # list(zip(peaks_neg_tot[x_starting], peaks_neg_tot[x_ending],
@ -1851,7 +1851,7 @@ def return_boxes_of_images_by_order_of_reading_new(
# "box area", (y_bot - y_top) * width, # "box area", (y_bot - y_top) * width,
# "label area", (min(y_bot, l_bot) - max(y_top, l_top)) * width, # "label area", (min(y_bot, l_bot) - max(y_top, l_top)) * width,
# "box height", (y_bot - y_top), # "box height", (y_bot - y_top),
# "label height", sum(regions_without_separators[ # "label height", sum(text_mask[
# y_top: y_bot, peaks_neg_tot[start + 1]])) # y_top: y_bot, peaks_neg_tot[start + 1]]))
return max((last for last, l_top, l_bot, l_count in labelcolmap.get(start, []) return max((last for last, l_top, l_bot, l_count in labelcolmap.get(start, [])
# yield the right-most column that does not cut through # yield the right-most column that does not cut through
@ -1868,7 +1868,7 @@ def return_boxes_of_images_by_order_of_reading_new(
(peaks_neg_tot[last] - peaks_neg_tot[start])) > 0.1 * l_count (peaks_neg_tot[last] - peaks_neg_tot[start])) > 0.1 * l_count
# But do allow cutting tiny passages with less 10% of height # But do allow cutting tiny passages with less 10% of height
# (i.e. label is already almost separated by columns) # (i.e. label is already almost separated by columns)
and sum(regions_without_separators[ and sum(text_mask[
y_top: y_bot, peaks_neg_tot[start + 1]]) > 0.1 * (y_bot - y_top)), y_top: y_bot, peaks_neg_tot[start + 1]]) > 0.1 * (y_bot - y_top)),
# Otherwise advance only 1 column. # Otherwise advance only 1 column.
default=start + 1) default=start + 1)