mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-04-30 19:22:03 +02:00
run_boxes_*: simplify, document class label mappings, start using
identifier constants instead of literals for labels
This commit is contained in:
parent
f29e876a7c
commit
6e0aed35f4
1 changed files with 93 additions and 113 deletions
|
|
@ -1676,33 +1676,36 @@ class Eynollah:
|
||||||
|
|
||||||
def run_boxes_no_full_layout(
|
def run_boxes_no_full_layout(
|
||||||
self, image_page, textline_mask_tot, text_regions_p,
|
self, image_page, textline_mask_tot, text_regions_p,
|
||||||
slope_deskew, num_col_classifier, table_prediction, erosion_hurts):
|
slope_deskew, num_col_classifier, table_prediction, erosion_hurts,
|
||||||
|
label_text=1,
|
||||||
|
label_imgs=2,
|
||||||
|
label_seps=3,
|
||||||
|
label_marg=4,
|
||||||
|
label_tabs=10,
|
||||||
|
):
|
||||||
self.logger.debug('enter run_boxes_no_full_layout')
|
self.logger.debug('enter run_boxes_no_full_layout')
|
||||||
t_0_box = time.time()
|
t_0_box = time.time()
|
||||||
|
regions_without_separators = (text_regions_p == label_text) * 1
|
||||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||||
textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
|
textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
|
||||||
text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
|
text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
|
||||||
table_prediction_n = rotate_image(table_prediction, slope_deskew)
|
regions_without_separators_d = (text_regions_p_d == label_text) * 1
|
||||||
regions_without_separators_d = (text_regions_p_d == 1) * 1
|
|
||||||
if self.tables:
|
if self.tables:
|
||||||
regions_without_separators_d[table_prediction_n == 1] = 1
|
table_prediction_d = rotate_image(table_prediction, slope_deskew)
|
||||||
regions_without_separators = (text_regions_p == 1) * 1
|
text_regions_p_d[table_prediction_d == 1] = label_tabs
|
||||||
# ( (text_regions_p==1) | (text_regions_p==2) )*1
|
regions_without_separators_d[table_prediction_d == 1] = 1
|
||||||
#self.return_regions_without_separators_new(text_regions_p,img_only_regions)
|
else:
|
||||||
#print(time.time()-t_0_box,'time box in 1')
|
|
||||||
if self.tables:
|
|
||||||
regions_without_separators[table_prediction ==1 ] = 1
|
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
||||||
text_regions_p_d = None
|
|
||||||
textline_mask_tot_d = None
|
textline_mask_tot_d = None
|
||||||
|
text_regions_p_d = None
|
||||||
regions_without_separators_d = None
|
regions_without_separators_d = None
|
||||||
label_seps = 3
|
if self.tables:
|
||||||
|
text_regions_p[table_prediction == 1] = label_tabs
|
||||||
|
regions_without_separators[table_prediction == 1] = 1
|
||||||
|
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
_, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
|
_, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
|
||||||
text_regions_p, num_col_classifier, self.tables, label_seps)
|
text_regions_p, num_col_classifier, self.tables, label_seps)
|
||||||
|
else:
|
||||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
||||||
_, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
|
_, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
|
||||||
text_regions_p_d, num_col_classifier, self.tables, label_seps)
|
text_regions_p_d, num_col_classifier, self.tables, label_seps)
|
||||||
#print(time.time()-t_0_box,'time box in 2')
|
#print(time.time()-t_0_box,'time box in 2')
|
||||||
|
|
@ -1737,70 +1740,40 @@ class Eynollah:
|
||||||
#print(time.time()-t_0_box,'time box in 4')
|
#print(time.time()-t_0_box,'time box in 4')
|
||||||
self.logger.info("detecting boxes took %.1fs", time.time() - t1)
|
self.logger.info("detecting boxes took %.1fs", time.time() - t1)
|
||||||
|
|
||||||
if self.tables:
|
|
||||||
text_regions_p[table_prediction == 1] = 10
|
|
||||||
img_revised_tab = text_regions_p[:, :]
|
|
||||||
polygons_of_images = return_contours_of_interested_region(text_regions_p, 2)
|
|
||||||
|
|
||||||
label_marginalia = 4
|
|
||||||
min_area_mar = 0.00001
|
min_area_mar = 0.00001
|
||||||
marginal_mask = (text_regions_p==label_marginalia)*1
|
polygons_of_tables = return_contours_of_interested_region(text_regions_p, label_tabs, min_area_mar)
|
||||||
marginal_mask = marginal_mask.astype('uint8')
|
|
||||||
marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2)
|
|
||||||
|
|
||||||
|
polygons_of_images = return_contours_of_interested_region(text_regions_p, label_imgs)
|
||||||
|
|
||||||
|
marginal_mask = (text_regions_p == label_marg).astype(np.uint8)
|
||||||
|
marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2)
|
||||||
polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar)
|
polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar)
|
||||||
|
|
||||||
label_tables = 10
|
|
||||||
contours_tables = return_contours_of_interested_region(text_regions_p, label_tables, min_area_mar)
|
|
||||||
#print(time.time()-t_0_box,'time box in 5')
|
#print(time.time()-t_0_box,'time box in 5')
|
||||||
self.logger.debug('exit run_boxes_no_full_layout')
|
self.logger.debug('exit run_boxes_no_full_layout')
|
||||||
return (polygons_of_images, img_revised_tab, text_regions_p_d, textline_mask_tot_d,
|
return (polygons_of_images, text_regions_p_d, textline_mask_tot_d,
|
||||||
regions_without_separators_d, boxes, boxes_d,
|
regions_without_separators_d, boxes, boxes_d,
|
||||||
polygons_of_marginals, contours_tables)
|
polygons_of_marginals, polygons_of_tables)
|
||||||
|
|
||||||
def run_boxes_full_layout(
|
def run_boxes_full_layout(
|
||||||
self, image_page, textline_mask_tot, text_regions_p,
|
self, image_page, textline_mask_tot, text_regions_p,
|
||||||
slope_deskew, num_col_classifier, img_only_regions,
|
slope_deskew, num_col_classifier, img_only_regions,
|
||||||
table_prediction, erosion_hurts):
|
table_prediction, erosion_hurts,
|
||||||
|
label_text=1,
|
||||||
|
label_imgs=2,
|
||||||
|
label_imgs_fl=5,
|
||||||
|
label_imgs_fl_model=4,
|
||||||
|
label_seps=3,
|
||||||
|
label_seps_fl=6,
|
||||||
|
label_seps_fl_model=5,
|
||||||
|
label_marg=4,
|
||||||
|
label_marg_fl=8,
|
||||||
|
label_drop_fl=4,
|
||||||
|
label_drop_fl_model=3,
|
||||||
|
label_tabs=10,
|
||||||
|
):
|
||||||
self.logger.debug('enter run_boxes_full_layout')
|
self.logger.debug('enter run_boxes_full_layout')
|
||||||
t_full0 = time.time()
|
t_full0 = time.time()
|
||||||
if self.tables:
|
|
||||||
text_regions_p[:,:][table_prediction[:,:]==1] = 10
|
|
||||||
img_revised_tab = text_regions_p[:,:]
|
|
||||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
||||||
textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
|
|
||||||
text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
|
|
||||||
table_prediction_n = rotate_image(table_prediction, slope_deskew)
|
|
||||||
regions_without_separators_d = (text_regions_p_d[:,:] == 1)*1
|
|
||||||
regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
|
|
||||||
else:
|
|
||||||
text_regions_p_d = None
|
|
||||||
textline_mask_tot_d = None
|
|
||||||
regions_without_separators_d = None
|
|
||||||
# regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1
|
|
||||||
#self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions)
|
|
||||||
regions_without_separators = (text_regions_p[:,:] == 1)*1
|
|
||||||
regions_without_separators[table_prediction == 1] = 1
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
label_marginalia = 4
|
|
||||||
min_area_mar = 0.00001
|
|
||||||
|
|
||||||
marginal_mask = (text_regions_p[:,:]==label_marginalia)*1
|
|
||||||
marginal_mask = marginal_mask.astype('uint8')
|
|
||||||
marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2)
|
|
||||||
|
|
||||||
polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar)
|
|
||||||
|
|
||||||
label_tables = 10
|
|
||||||
contours_tables = return_contours_of_interested_region(text_regions_p, label_tables, min_area_mar)
|
|
||||||
|
|
||||||
# set first model with second model
|
|
||||||
text_regions_p[:, :][text_regions_p[:, :] == 2] = 5
|
|
||||||
text_regions_p[:, :][text_regions_p[:, :] == 3] = 6
|
|
||||||
text_regions_p[:, :][text_regions_p[:, :] == 4] = 8
|
|
||||||
|
|
||||||
image_page = image_page.astype(np.uint8)
|
image_page = image_page.astype(np.uint8)
|
||||||
#print("full inside 1", time.time()- t_full0)
|
#print("full inside 1", time.time()- t_full0)
|
||||||
|
|
@ -1808,60 +1781,67 @@ class Eynollah:
|
||||||
image_page,
|
image_page,
|
||||||
False, cols=num_col_classifier)
|
False, cols=num_col_classifier)
|
||||||
#print("full inside 2", time.time()- t_full0)
|
#print("full inside 2", time.time()- t_full0)
|
||||||
# 6 is the separators lable in old full layout model
|
|
||||||
# 4 is the drop capital class in old full layout model
|
# segment labels used by models/arrays:
|
||||||
# in the new full layout drop capital is 3 and separators are 5
|
# class | early | old full (and decoded here) | new full (just predicted) | comment
|
||||||
|
# ---
|
||||||
|
# para | 1 | 1 | 1 |
|
||||||
|
# head | - | 2 | 2 | used in split_textregion_main_vs_head()
|
||||||
|
# drop | - | 4 | 3 | assigned from full model below
|
||||||
|
# img | 2 | 5 | 4 | mapped below
|
||||||
|
# sep | 3 | 6 | 5 | mapped + assigned from full model below
|
||||||
|
# marg | 4 | 8 | - | rule-based in run_marginals() from early text
|
||||||
|
# tab | - | 10 | - | dedicated model, optional
|
||||||
|
text_regions_p[text_regions_p == label_imgs] = label_imgs_fl
|
||||||
|
text_regions_p[text_regions_p == label_seps] = label_seps_fl
|
||||||
|
text_regions_p[text_regions_p == label_marg] = label_marg_fl
|
||||||
|
|
||||||
# the separators in full layout will not be written on layout
|
# the separators in full layout will not be written on layout
|
||||||
if not self.reading_order_machine_based:
|
if not self.reading_order_machine_based:
|
||||||
text_regions_p[regions_fully==5]=6
|
text_regions_p[regions_fully == label_seps_fl_model] = label_seps_fl
|
||||||
|
|
||||||
#text_regions_p[:,:][regions_fully[:,:]==6]=6
|
|
||||||
drop_capital_label_in_full_layout_model = 3
|
|
||||||
|
|
||||||
drops = regions_fully == drop_capital_label_in_full_layout_model
|
|
||||||
regions_fully[drops] = 1
|
|
||||||
|
|
||||||
|
drops = regions_fully == label_drop_fl_model
|
||||||
|
regions_fully[drops] = label_text
|
||||||
|
# rs: why erode to text here, when putt_bb... will mask out text (only allowing img/drop/bg)?
|
||||||
drops = cv2.erode(drops.astype(np.uint8), KERNEL, iterations=1) == 1
|
drops = cv2.erode(drops.astype(np.uint8), KERNEL, iterations=1) == 1
|
||||||
regions_fully[drops] = drop_capital_label_in_full_layout_model
|
regions_fully[drops] = label_drop_fl_model
|
||||||
|
|
||||||
regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(
|
regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(
|
||||||
regions_fully, drop_capital_label_in_full_layout_model, text_regions_p)
|
regions_fully, label_drop_fl_model, text_regions_p)
|
||||||
##regions_fully_np = self.extract_text_regions(image_page, False, cols=num_col_classifier)
|
text_regions_p[regions_fully == label_drop_fl_model] = label_drop_fl
|
||||||
##if num_col_classifier > 2:
|
|
||||||
##regions_fully_np[regions_fully_np == 4] = 0
|
|
||||||
##else:
|
|
||||||
##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p)
|
|
||||||
|
|
||||||
###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully,
|
regions_without_separators = (text_regions_p == label_text) * 1
|
||||||
### regions_fully_np, img_only_regions)
|
# regions_without_separators = ( text_regions_p == 1 | text_regions_p == 2 ) * 1
|
||||||
# plt.imshow(regions_fully)
|
#self.return_regions_without_separators_new(text_regions_p, img_only_regions)
|
||||||
# plt.show()
|
|
||||||
text_regions_p[regions_fully == drop_capital_label_in_full_layout_model] = 4
|
|
||||||
####text_regions_p[regions_fully_np == 4] = 4
|
|
||||||
#plt.imshow(text_regions_p)
|
|
||||||
#plt.show()
|
|
||||||
####if not self.tables:
|
|
||||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||||
textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
|
textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
|
||||||
text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
|
text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
|
||||||
regions_fully_n = rotate_image(regions_fully, slope_deskew)
|
regions_without_separators_d = (text_regions_p_d == label_text) * 1
|
||||||
if not self.tables:
|
if self.tables:
|
||||||
regions_without_separators_d = (text_regions_p_d == 1) * 1
|
table_prediction_d = rotate_image(table_prediction, slope_deskew)
|
||||||
|
text_regions_p_d[table_prediction_d == 1] = label_tabs
|
||||||
|
regions_without_separators_d[table_prediction_d == 1] = 1
|
||||||
else:
|
else:
|
||||||
text_regions_p_d = None
|
|
||||||
textline_mask_tot_d = None
|
textline_mask_tot_d = None
|
||||||
|
text_regions_p_d = None
|
||||||
regions_without_separators_d = None
|
regions_without_separators_d = None
|
||||||
if not self.tables:
|
if self.tables:
|
||||||
regions_without_separators = (text_regions_p == 1) * 1
|
text_regions_p[table_prediction == 1] = label_tabs
|
||||||
img_revised_tab = np.copy(text_regions_p)
|
regions_without_separators[table_prediction == 1] = 1
|
||||||
polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5)
|
|
||||||
|
min_area_mar = 0.00001
|
||||||
|
marginal_mask = (text_regions_p == label_marg_fl).astype(np.uint8)
|
||||||
|
marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2)
|
||||||
|
polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar)
|
||||||
|
|
||||||
|
polygons_of_tables = return_contours_of_interested_region(text_regions_p, label_tabs, min_area_mar)
|
||||||
|
|
||||||
|
polygons_of_images = return_contours_of_interested_region(text_regions_p, label_imgs_fl)
|
||||||
|
|
||||||
self.logger.debug('exit run_boxes_full_layout')
|
self.logger.debug('exit run_boxes_full_layout')
|
||||||
#print("full inside 3", time.time()- t_full0)
|
#print("full inside 3", time.time()- t_full0)
|
||||||
return (polygons_of_images, img_revised_tab, text_regions_p_d, textline_mask_tot_d,
|
return (polygons_of_images, text_regions_p_d, textline_mask_tot_d,
|
||||||
regions_without_separators_d, regions_fully, regions_without_separators,
|
regions_without_separators_d, regions_fully, regions_without_separators,
|
||||||
polygons_of_marginals, contours_tables)
|
polygons_of_marginals, polygons_of_tables)
|
||||||
|
|
||||||
def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p):
|
def do_order_of_regions_with_model(self, contours_only_text_parent, contours_only_text_parent_h, text_regions_p):
|
||||||
|
|
||||||
|
|
@ -2556,26 +2536,26 @@ class Eynollah:
|
||||||
## birdan sora chock chakir
|
## birdan sora chock chakir
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
if not self.full_layout:
|
if not self.full_layout:
|
||||||
polygons_of_images, img_revised_tab, text_regions_p_d, \
|
polygons_of_images, text_regions_p_d, \
|
||||||
textline_mask_tot_ea_d, regions_without_separators_d, \
|
textline_mask_tot_ea_d, regions_without_separators_d, \
|
||||||
boxes, boxes_d, polygons_of_marginals, contours_tables = \
|
boxes, boxes_d, polygons_of_marginals, contours_tables = \
|
||||||
self.run_boxes_no_full_layout(image_page, textline_mask_tot_ea, text_regions_p, slope_deskew,
|
self.run_boxes_no_full_layout(image_page, textline_mask_tot_ea, text_regions_p, slope_deskew,
|
||||||
num_col_classifier, table_prediction, erosion_hurts)
|
num_col_classifier, table_prediction, erosion_hurts)
|
||||||
###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
|
###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
|
||||||
else:
|
else:
|
||||||
polygons_of_images, img_revised_tab, text_regions_p_d, \
|
polygons_of_images, text_regions_p_d, \
|
||||||
textline_mask_tot_ea_d, regions_without_separators_d, \
|
textline_mask_tot_ea_d, regions_without_separators_d, \
|
||||||
regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \
|
regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \
|
||||||
self.run_boxes_full_layout(image_page, textline_mask_tot_ea, text_regions_p, slope_deskew,
|
self.run_boxes_full_layout(image_page, textline_mask_tot_ea, text_regions_p, slope_deskew,
|
||||||
num_col_classifier, img_only_regions, table_prediction, erosion_hurts)
|
num_col_classifier, img_only_regions, table_prediction, erosion_hurts)
|
||||||
###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
|
###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
|
||||||
drop_label_in_full_layout = 4
|
# suppress drop capitals for deskewing (but keep for reading order)
|
||||||
textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0
|
label_drop = 4
|
||||||
|
textline_mask_tot_ea_org[text_regions_p == label_drop] = 0
|
||||||
|
|
||||||
|
text_only = (text_regions_p == 1) * 1
|
||||||
text_only = (img_revised_tab == 1) * 1
|
|
||||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||||
text_only_d = ((text_regions_p_d[:, :] == 1)) * 1
|
text_only_d = (text_regions_p_d == 1) * 1
|
||||||
|
|
||||||
#print("text region early 2 in %.1fs", time.time() - t0)
|
#print("text region early 2 in %.1fs", time.time() - t0)
|
||||||
###min_con_area = 0.000005
|
###min_con_area = 0.000005
|
||||||
|
|
@ -2863,8 +2843,8 @@ class Eynollah:
|
||||||
self.plotter.save_plot_of_layout(text_regions_p, image_page, image['name'])
|
self.plotter.save_plot_of_layout(text_regions_p, image_page, image['name'])
|
||||||
self.plotter.save_plot_of_layout_all(text_regions_p, image_page, image['name'])
|
self.plotter.save_plot_of_layout_all(text_regions_p, image_page, image['name'])
|
||||||
|
|
||||||
label_img = 4
|
label_drop = 4
|
||||||
polygons_of_drop_capitals = return_contours_of_interested_region(text_regions_p, label_img,
|
polygons_of_drop_capitals = return_contours_of_interested_region(text_regions_p, label_drop,
|
||||||
min_area=0.00003)
|
min_area=0.00003)
|
||||||
##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(
|
##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(
|
||||||
##text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h,
|
##text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue