mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-05-01 03:32:00 +02:00
run_marginals: drop unnecessarily passing textline_mask, mask_seps, mask_images
This commit is contained in:
parent
9309586712
commit
f5f2435a38
1 changed files with 17 additions and 25 deletions
|
|
@ -1586,7 +1586,8 @@ class Eynollah:
|
||||||
|
|
||||||
mask_images = (text_regions_p_1 == 2).astype(np.uint8)
|
mask_images = (text_regions_p_1 == 2).astype(np.uint8)
|
||||||
mask_images = cv2.erode(mask_images, KERNEL, iterations=10)
|
mask_images = cv2.erode(mask_images, KERNEL, iterations=10)
|
||||||
mask_seps = (text_regions_p_1 == 3).astype(np.uint8)
|
textline_mask_tot_ea[mask_images == 1] = 0
|
||||||
|
|
||||||
img_only_regions_with_sep = ((text_regions_p_1 != 3) &
|
img_only_regions_with_sep = ((text_regions_p_1 != 3) &
|
||||||
(text_regions_p_1 != 0)).astype(np.uint8)
|
(text_regions_p_1 != 0)).astype(np.uint8)
|
||||||
|
|
||||||
|
|
@ -1611,7 +1612,7 @@ class Eynollah:
|
||||||
except Exception as why:
|
except Exception as why:
|
||||||
self.logger.exception(why)
|
self.logger.exception(why)
|
||||||
num_col = None
|
num_col = None
|
||||||
return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_seps,
|
return (num_col, num_col_classifier, img_only_regions, page_coord, image_page,
|
||||||
text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea)
|
text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea)
|
||||||
|
|
||||||
def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, image):
|
def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, image):
|
||||||
|
|
@ -1656,18 +1657,12 @@ class Eynollah:
|
||||||
return slope_deskew
|
return slope_deskew
|
||||||
|
|
||||||
def run_marginals(
|
def run_marginals(
|
||||||
self, textline_mask_tot_ea, mask_images, mask_seps,
|
self, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction):
|
||||||
num_col_classifier, slope_deskew, text_regions_p_1, table_prediction):
|
|
||||||
|
|
||||||
textline_mask_tot = textline_mask_tot_ea[:, :]
|
text_regions_p = np.array(text_regions_p_1)
|
||||||
textline_mask_tot[mask_images[:, :] == 1] = 0
|
|
||||||
|
|
||||||
text_regions_p_1[mask_seps[:, :] == 1] = 3
|
|
||||||
text_regions_p = text_regions_p_1[:, :]
|
|
||||||
text_regions_p = np.array(text_regions_p)
|
|
||||||
if num_col_classifier in (1, 2):
|
if num_col_classifier in (1, 2):
|
||||||
try:
|
try:
|
||||||
regions_without_separators = (text_regions_p[:, :] == 1) * 1
|
regions_without_separators = (text_regions_p == 1) * 1
|
||||||
if self.tables:
|
if self.tables:
|
||||||
regions_without_separators[table_prediction==1] = 1
|
regions_without_separators[table_prediction==1] = 1
|
||||||
regions_without_separators = regions_without_separators.astype(np.uint8)
|
regions_without_separators = regions_without_separators.astype(np.uint8)
|
||||||
|
|
@ -1677,7 +1672,7 @@ class Eynollah:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error("exception %s", e)
|
self.logger.error("exception %s", e)
|
||||||
|
|
||||||
return textline_mask_tot, text_regions_p
|
return text_regions_p
|
||||||
|
|
||||||
def run_boxes_no_full_layout(
|
def run_boxes_no_full_layout(
|
||||||
self, image_page, textline_mask_tot, text_regions_p,
|
self, image_page, textline_mask_tot, text_regions_p,
|
||||||
|
|
@ -2468,7 +2463,7 @@ class Eynollah:
|
||||||
self.plotter.save_deskewed_image(slope_deskew, image['img'], image['name'])
|
self.plotter.save_deskewed_image(slope_deskew, image['img'], image['name'])
|
||||||
#print("text region early -2,5 in %.1fs", time.time() - t0)
|
#print("text region early -2,5 in %.1fs", time.time() - t0)
|
||||||
#self.logger.info("Textregion detection took %.1fs ", time.time() - t1t)
|
#self.logger.info("Textregion detection took %.1fs ", time.time() - t1t)
|
||||||
num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_seps, \
|
num_col, num_col_classifier, img_only_regions, page_coord, image_page, \
|
||||||
text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \
|
text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \
|
||||||
self.run_graphics_and_columns(text_regions_p_1, textline_mask_tot_ea,
|
self.run_graphics_and_columns(text_regions_p_1, textline_mask_tot_ea,
|
||||||
num_col_classifier, num_column_is_classified,
|
num_col_classifier, num_column_is_classified,
|
||||||
|
|
@ -2521,14 +2516,12 @@ class Eynollah:
|
||||||
|
|
||||||
image_page = resize_image(image_page,img_h_new, img_w_new )
|
image_page = resize_image(image_page,img_h_new, img_w_new )
|
||||||
textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new )
|
textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new )
|
||||||
mask_images = resize_image(mask_images,img_h_new, img_w_new )
|
|
||||||
mask_seps = resize_image(mask_seps, img_h_new, img_w_new)
|
|
||||||
text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new )
|
text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new )
|
||||||
table_prediction = resize_image(table_prediction,img_h_new, img_w_new )
|
table_prediction = resize_image(table_prediction,img_h_new, img_w_new )
|
||||||
|
|
||||||
textline_mask_tot, text_regions_p = \
|
text_regions_p = \
|
||||||
self.run_marginals(textline_mask_tot_ea, mask_images, mask_seps,
|
self.run_marginals(num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
|
||||||
num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
|
|
||||||
if self.plotter:
|
if self.plotter:
|
||||||
self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page, image['name'])
|
self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page, image['name'])
|
||||||
self.plotter.save_plot_of_layout_main(text_regions_p, image_page, image['name'])
|
self.plotter.save_plot_of_layout_main(text_regions_p, image_page, image['name'])
|
||||||
|
|
@ -2554,7 +2547,6 @@ class Eynollah:
|
||||||
image_page = resize_image(image_page,org_h_l_m, org_w_l_m )
|
image_page = resize_image(image_page,org_h_l_m, org_w_l_m )
|
||||||
textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m )
|
textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m )
|
||||||
text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m )
|
text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m )
|
||||||
textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m )
|
|
||||||
text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m )
|
text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m )
|
||||||
table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m )
|
table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m )
|
||||||
|
|
||||||
|
|
@ -2563,16 +2555,16 @@ class Eynollah:
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
if not self.full_layout:
|
if not self.full_layout:
|
||||||
polygons_of_images, img_revised_tab, text_regions_p_d, \
|
polygons_of_images, img_revised_tab, text_regions_p_d, \
|
||||||
textline_mask_tot_d, regions_without_separators_d, \
|
textline_mask_tot_ea_d, regions_without_separators_d, \
|
||||||
boxes, boxes_d, polygons_of_marginals, contours_tables = \
|
boxes, boxes_d, polygons_of_marginals, contours_tables = \
|
||||||
self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
|
self.run_boxes_no_full_layout(image_page, textline_mask_tot_ea, text_regions_p, slope_deskew,
|
||||||
num_col_classifier, table_prediction, erosion_hurts)
|
num_col_classifier, table_prediction, erosion_hurts)
|
||||||
###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
|
###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
|
||||||
else:
|
else:
|
||||||
polygons_of_images, img_revised_tab, text_regions_p_d, \
|
polygons_of_images, img_revised_tab, text_regions_p_d, \
|
||||||
textline_mask_tot_d, regions_without_separators_d, \
|
textline_mask_tot_ea_d, regions_without_separators_d, \
|
||||||
regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \
|
regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \
|
||||||
self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
|
self.run_boxes_full_layout(image_page, textline_mask_tot_ea, text_regions_p, slope_deskew,
|
||||||
num_col_classifier, img_only_regions, table_prediction, erosion_hurts)
|
num_col_classifier, img_only_regions, table_prediction, erosion_hurts)
|
||||||
###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
|
###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
|
||||||
drop_label_in_full_layout = 4
|
drop_label_in_full_layout = 4
|
||||||
|
|
@ -2936,11 +2928,11 @@ class Eynollah:
|
||||||
else:
|
else:
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
order_text_new = self.do_order_of_regions(
|
order_text_new = self.do_order_of_regions(
|
||||||
contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot)
|
contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot_ea)
|
||||||
else:
|
else:
|
||||||
order_text_new = self.do_order_of_regions(
|
order_text_new = self.do_order_of_regions(
|
||||||
contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered,
|
contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered,
|
||||||
boxes_d, textline_mask_tot_d)
|
boxes_d, textline_mask_tot_ea_d)
|
||||||
self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
|
self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
|
||||||
|
|
||||||
self.logger.info("Step 5/5: Output Generation")
|
self.logger.info("Step 5/5: Output Generation")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue