avoid indentation (skip_layout_and_reading_order)

pull/142/head
Robert Sachunsky 3 weeks ago
parent 5b82320707
commit cd4e426977

@ -4934,7 +4934,42 @@ class Eynollah:
self.logger.info("Enhancing took %.1fs ", time.time() - t0) self.logger.info("Enhancing took %.1fs ", time.time() - t0)
#print("text region early -1 in %.1fs", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0)
t1 = time.time() t1 = time.time()
if not self.skip_layout_and_reading_order: if self.skip_layout_and_reading_order:
_ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order)
page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light)
##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea)
cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
all_found_textline_polygons=[ all_found_textline_polygons ]
all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons)
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline")
order_text_new = [0]
slopes =[0]
id_of_texts_tot =['region_0001']
polygons_of_images = []
slopes_marginals = []
polygons_of_marginals = []
all_found_textline_polygons_marginals = []
all_box_coord_marginals = []
polygons_lines_xml = []
contours_tables = []
ocr_all_textlines = None
pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines)
if self.dir_in:
continue
else:
return pcgts
if self.light_version: if self.light_version:
text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
#print("text region early -2 in %.1fs", time.time() - t0) #print("text region early -2 in %.1fs", time.time() - t0)
@ -5379,44 +5414,11 @@ class Eynollah:
if not self.dir_in: if not self.dir_in:
return pcgts return pcgts
#print("text region early 7 in %.1fs", time.time() - t0) #print("text region early 7 in %.1fs", time.time() - t0)
else:
_ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order)
page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light)
##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea)
cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
all_found_textline_polygons=[ all_found_textline_polygons ]
all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons)
all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline")
order_text_new = [0]
slopes =[0]
id_of_texts_tot =['region_0001']
polygons_of_images = []
slopes_marginals = []
polygons_of_marginals = []
all_found_textline_polygons_marginals = []
all_box_coord_marginals = []
polygons_lines_xml = []
contours_tables = []
ocr_all_textlines = None
pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines)
if not self.dir_in:
return pcgts
if self.dir_in: if self.dir_in:
self.writer.write_pagexml(pcgts) self.writer.write_pagexml(pcgts)
#self.logger.info("Job done in %.1fs", time.time() - t0) #self.logger.info("Job done in %.1fs", time.time() - t0)
print("Job done in %.1fs" % time.time() - t0) print("Job done in %.1fs" % (time.time() - t0))
if self.dir_in: if self.dir_in:
self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) self.logger.info("All jobs done in %.1fs", time.time() - t0_tot)

Loading…
Cancel
Save