|
|
|
@ -4934,7 +4934,42 @@ class Eynollah:
|
|
|
|
|
self.logger.info("Enhancing took %.1fs ", time.time() - t0)
|
|
|
|
|
#print("text region early -1 in %.1fs", time.time() - t0)
|
|
|
|
|
t1 = time.time()
|
|
|
|
|
if not self.skip_layout_and_reading_order:
|
|
|
|
|
if self.skip_layout_and_reading_order:
|
|
|
|
|
_ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order)
|
|
|
|
|
|
|
|
|
|
page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea)
|
|
|
|
|
|
|
|
|
|
cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
|
|
|
|
|
all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
|
|
|
|
|
|
|
|
|
|
all_found_textline_polygons=[ all_found_textline_polygons ]
|
|
|
|
|
|
|
|
|
|
all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons)
|
|
|
|
|
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
order_text_new = [0]
|
|
|
|
|
slopes =[0]
|
|
|
|
|
id_of_texts_tot =['region_0001']
|
|
|
|
|
|
|
|
|
|
polygons_of_images = []
|
|
|
|
|
slopes_marginals = []
|
|
|
|
|
polygons_of_marginals = []
|
|
|
|
|
all_found_textline_polygons_marginals = []
|
|
|
|
|
all_box_coord_marginals = []
|
|
|
|
|
polygons_lines_xml = []
|
|
|
|
|
contours_tables = []
|
|
|
|
|
ocr_all_textlines = None
|
|
|
|
|
|
|
|
|
|
pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines)
|
|
|
|
|
if self.dir_in:
|
|
|
|
|
continue
|
|
|
|
|
else:
|
|
|
|
|
return pcgts
|
|
|
|
|
|
|
|
|
|
if self.light_version:
|
|
|
|
|
text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
|
|
|
|
|
#print("text region early -2 in %.1fs", time.time() - t0)
|
|
|
|
@ -5379,44 +5414,11 @@ class Eynollah:
|
|
|
|
|
if not self.dir_in:
|
|
|
|
|
return pcgts
|
|
|
|
|
#print("text region early 7 in %.1fs", time.time() - t0)
|
|
|
|
|
else:
|
|
|
|
|
_ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order)
|
|
|
|
|
|
|
|
|
|
page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea)
|
|
|
|
|
|
|
|
|
|
cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
|
|
|
|
|
all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
|
|
|
|
|
|
|
|
|
|
all_found_textline_polygons=[ all_found_textline_polygons ]
|
|
|
|
|
|
|
|
|
|
all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons)
|
|
|
|
|
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
order_text_new = [0]
|
|
|
|
|
slopes =[0]
|
|
|
|
|
id_of_texts_tot =['region_0001']
|
|
|
|
|
|
|
|
|
|
polygons_of_images = []
|
|
|
|
|
slopes_marginals = []
|
|
|
|
|
polygons_of_marginals = []
|
|
|
|
|
all_found_textline_polygons_marginals = []
|
|
|
|
|
all_box_coord_marginals = []
|
|
|
|
|
polygons_lines_xml = []
|
|
|
|
|
contours_tables = []
|
|
|
|
|
ocr_all_textlines = None
|
|
|
|
|
|
|
|
|
|
pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines)
|
|
|
|
|
if not self.dir_in:
|
|
|
|
|
return pcgts
|
|
|
|
|
|
|
|
|
|
if self.dir_in:
|
|
|
|
|
self.writer.write_pagexml(pcgts)
|
|
|
|
|
#self.logger.info("Job done in %.1fs", time.time() - t0)
|
|
|
|
|
print("Job done in %.1fs" % time.time() - t0)
|
|
|
|
|
print("Job done in %.1fs" % (time.time() - t0))
|
|
|
|
|
|
|
|
|
|
if self.dir_in:
|
|
|
|
|
self.logger.info("All jobs done in %.1fs", time.time() - t0_tot)
|
|
|
|
|