|
|
@ -2909,7 +2909,7 @@ class Eynollah:
|
|
|
|
contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > min_con_area]
|
|
|
|
contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > min_con_area]
|
|
|
|
areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area]
|
|
|
|
areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area]
|
|
|
|
index_con_parents = np.argsort(areas_cnt_text_parent)
|
|
|
|
index_con_parents = np.argsort(areas_cnt_text_parent)
|
|
|
|
contours_only_text_parent = list(np.array(contours_only_text_parent)[index_con_parents])
|
|
|
|
contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents])
|
|
|
|
areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents])
|
|
|
|
areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents])
|
|
|
|
|
|
|
|
|
|
|
|
cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest])
|
|
|
|
cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest])
|
|
|
@ -2924,7 +2924,7 @@ class Eynollah:
|
|
|
|
if len(areas_cnt_text_d)>0:
|
|
|
|
if len(areas_cnt_text_d)>0:
|
|
|
|
contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)]
|
|
|
|
contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)]
|
|
|
|
index_con_parents_d = np.argsort(areas_cnt_text_d)
|
|
|
|
index_con_parents_d = np.argsort(areas_cnt_text_d)
|
|
|
|
contours_only_text_parent_d = list(np.array(contours_only_text_parent_d)[index_con_parents_d])
|
|
|
|
contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d])
|
|
|
|
areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d])
|
|
|
|
areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d])
|
|
|
|
|
|
|
|
|
|
|
|
cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d])
|
|
|
|
cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d])
|
|
|
@ -2987,7 +2987,7 @@ class Eynollah:
|
|
|
|
areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area]
|
|
|
|
areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area]
|
|
|
|
|
|
|
|
|
|
|
|
index_con_parents = np.argsort(areas_cnt_text_parent)
|
|
|
|
index_con_parents = np.argsort(areas_cnt_text_parent)
|
|
|
|
contours_only_text_parent = list(np.array(contours_only_text_parent)[index_con_parents])
|
|
|
|
contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents])
|
|
|
|
areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents])
|
|
|
|
areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents])
|
|
|
|
|
|
|
|
|
|
|
|
cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest])
|
|
|
|
cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest])
|
|
|
@ -3026,7 +3026,7 @@ class Eynollah:
|
|
|
|
|
|
|
|
|
|
|
|
if self.full_layout:
|
|
|
|
if self.full_layout:
|
|
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
|
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
|
|
|
contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con])
|
|
|
|
contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con])
|
|
|
|
if self.light_version:
|
|
|
|
if self.light_version:
|
|
|
|
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered)
|
|
|
|
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
@ -3099,7 +3099,7 @@ class Eynollah:
|
|
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
|
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
|
|
order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot)
|
|
|
|
order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con])
|
|
|
|
contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con])
|
|
|
|
order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d)
|
|
|
|
order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d)
|
|
|
|
pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables)
|
|
|
|
pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables)
|
|
|
|
self.logger.info("Job done in %.1fs", time.time() - t0)
|
|
|
|
self.logger.info("Job done in %.1fs", time.time() - t0)
|
|
|
|