mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-15 15:19:55 +02:00
avoid indentation (skip_layout_and_reading_order)
This commit is contained in:
parent
5b82320707
commit
cd4e426977
1 changed files with 449 additions and 447 deletions
|
@ -4934,7 +4934,42 @@ class Eynollah:
|
|||
self.logger.info("Enhancing took %.1fs ", time.time() - t0)
|
||||
#print("text region early -1 in %.1fs", time.time() - t0)
|
||||
t1 = time.time()
|
||||
if not self.skip_layout_and_reading_order:
|
||||
if self.skip_layout_and_reading_order:
|
||||
_ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order)
|
||||
|
||||
page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light)
|
||||
|
||||
|
||||
##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea)
|
||||
|
||||
cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
|
||||
all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
|
||||
|
||||
all_found_textline_polygons=[ all_found_textline_polygons ]
|
||||
|
||||
all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons)
|
||||
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline")
|
||||
|
||||
|
||||
order_text_new = [0]
|
||||
slopes =[0]
|
||||
id_of_texts_tot =['region_0001']
|
||||
|
||||
polygons_of_images = []
|
||||
slopes_marginals = []
|
||||
polygons_of_marginals = []
|
||||
all_found_textline_polygons_marginals = []
|
||||
all_box_coord_marginals = []
|
||||
polygons_lines_xml = []
|
||||
contours_tables = []
|
||||
ocr_all_textlines = None
|
||||
|
||||
pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines)
|
||||
if self.dir_in:
|
||||
continue
|
||||
else:
|
||||
return pcgts
|
||||
|
||||
if self.light_version:
|
||||
text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
|
||||
#print("text region early -2 in %.1fs", time.time() - t0)
|
||||
|
@ -5379,44 +5414,11 @@ class Eynollah:
|
|||
if not self.dir_in:
|
||||
return pcgts
|
||||
#print("text region early 7 in %.1fs", time.time() - t0)
|
||||
else:
|
||||
_ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order)
|
||||
|
||||
page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light)
|
||||
|
||||
|
||||
##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea)
|
||||
|
||||
cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
|
||||
all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
|
||||
|
||||
all_found_textline_polygons=[ all_found_textline_polygons ]
|
||||
|
||||
all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons)
|
||||
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline")
|
||||
|
||||
|
||||
order_text_new = [0]
|
||||
slopes =[0]
|
||||
id_of_texts_tot =['region_0001']
|
||||
|
||||
polygons_of_images = []
|
||||
slopes_marginals = []
|
||||
polygons_of_marginals = []
|
||||
all_found_textline_polygons_marginals = []
|
||||
all_box_coord_marginals = []
|
||||
polygons_lines_xml = []
|
||||
contours_tables = []
|
||||
ocr_all_textlines = None
|
||||
|
||||
pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines)
|
||||
if not self.dir_in:
|
||||
return pcgts
|
||||
|
||||
if self.dir_in:
|
||||
self.writer.write_pagexml(pcgts)
|
||||
#self.logger.info("Job done in %.1fs", time.time() - t0)
|
||||
print("Job done in %.1fs" % time.time() - t0)
|
||||
print("Job done in %.1fs" % (time.time() - t0))
|
||||
|
||||
if self.dir_in:
|
||||
self.logger.info("All jobs done in %.1fs", time.time() - t0_tot)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue