mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-09 04:09:54 +02:00
commit
0ea90b7509
6 changed files with 124 additions and 124 deletions
|
@ -1159,7 +1159,7 @@ class Eynollah:
|
|||
processes[i].start()
|
||||
|
||||
slopes = []
|
||||
all_found_texline_polygons = []
|
||||
all_found_textline_polygons = []
|
||||
all_found_text_regions = []
|
||||
all_found_text_regions_par = []
|
||||
boxes = []
|
||||
|
@ -1176,7 +1176,7 @@ class Eynollah:
|
|||
indexes_for_subprocess = list_all_par[6]
|
||||
for j in range(len(slopes_for_sub_process)):
|
||||
slopes.append(slopes_for_sub_process[j])
|
||||
all_found_texline_polygons.append(polys_for_sub_process[j])
|
||||
all_found_textline_polygons.append(polys_for_sub_process[j])
|
||||
boxes.append(boxes_for_sub_process[j])
|
||||
all_found_text_regions.append(contours_for_subprocess[j])
|
||||
all_found_text_regions_par.append(contours_par_for_subprocess[j])
|
||||
|
@ -1186,7 +1186,7 @@ class Eynollah:
|
|||
processes[i].join()
|
||||
self.logger.debug('slopes %s', slopes)
|
||||
self.logger.debug("exit get_slopes_and_deskew_new")
|
||||
return slopes, all_found_texline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con
|
||||
return slopes, all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con
|
||||
|
||||
def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew):
|
||||
self.logger.debug("enter get_slopes_and_deskew_new")
|
||||
|
@ -1207,7 +1207,7 @@ class Eynollah:
|
|||
processes[i].start()
|
||||
|
||||
slopes = []
|
||||
all_found_texline_polygons = []
|
||||
all_found_textline_polygons = []
|
||||
all_found_text_regions = []
|
||||
all_found_text_regions_par = []
|
||||
boxes = []
|
||||
|
@ -1224,7 +1224,7 @@ class Eynollah:
|
|||
indexes_for_subprocess = list_all_par[6]
|
||||
for j in range(len(slopes_for_sub_process)):
|
||||
slopes.append(slopes_for_sub_process[j])
|
||||
all_found_texline_polygons.append(polys_for_sub_process[j])
|
||||
all_found_textline_polygons.append(polys_for_sub_process[j])
|
||||
boxes.append(boxes_for_sub_process[j])
|
||||
all_found_text_regions.append(contours_for_subprocess[j])
|
||||
all_found_text_regions_par.append(contours_par_for_subprocess[j])
|
||||
|
@ -1234,7 +1234,7 @@ class Eynollah:
|
|||
processes[i].join()
|
||||
self.logger.debug('slopes %s', slopes)
|
||||
self.logger.debug("exit get_slopes_and_deskew_new")
|
||||
return slopes, all_found_texline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con
|
||||
return slopes, all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con
|
||||
|
||||
def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew):
|
||||
self.logger.debug("enter get_slopes_and_deskew_new_curved")
|
||||
|
@ -1257,7 +1257,7 @@ class Eynollah:
|
|||
processes[i].start()
|
||||
|
||||
slopes = []
|
||||
all_found_texline_polygons = []
|
||||
all_found_textline_polygons = []
|
||||
all_found_text_regions = []
|
||||
all_found_text_regions_par = []
|
||||
boxes = []
|
||||
|
@ -1275,7 +1275,7 @@ class Eynollah:
|
|||
slopes_for_sub_process = list_all_par[6]
|
||||
for j in range(len(polys_for_sub_process)):
|
||||
slopes.append(slopes_for_sub_process[j])
|
||||
all_found_texline_polygons.append(polys_for_sub_process[j][::-1])
|
||||
all_found_textline_polygons.append(polys_for_sub_process[j][::-1])
|
||||
boxes.append(boxes_for_sub_process[j])
|
||||
all_found_text_regions.append(contours_for_subprocess[j])
|
||||
all_found_text_regions_par.append(contours_par_for_subprocess[j])
|
||||
|
@ -1285,7 +1285,7 @@ class Eynollah:
|
|||
for i in range(num_cores):
|
||||
processes[i].join()
|
||||
# print(slopes,'slopes')
|
||||
return all_found_texline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con, slopes
|
||||
return all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con, slopes
|
||||
|
||||
def do_work_of_slopes_new_curved(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, image_page_rotated, mask_texts_only, num_col, scale_par, indexes_r_con_per_pro, slope_deskew):
|
||||
self.logger.debug("enter do_work_of_slopes_new_curved")
|
||||
|
@ -3007,37 +3007,37 @@ class Eynollah:
|
|||
if not self.curved_line:
|
||||
if self.light_version:
|
||||
if self.textline_light:
|
||||
slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew)
|
||||
slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew)
|
||||
slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew)
|
||||
slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew)
|
||||
else:
|
||||
slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew)
|
||||
slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew)
|
||||
slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew)
|
||||
slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew)
|
||||
else:
|
||||
slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew)
|
||||
slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew)
|
||||
slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew)
|
||||
slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew)
|
||||
|
||||
else:
|
||||
|
||||
scale_param = 1
|
||||
all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew)
|
||||
all_found_texline_polygons = small_textlines_to_parent_adherence2(all_found_texline_polygons, textline_mask_tot_ea, num_col_classifier)
|
||||
all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew)
|
||||
all_found_texline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_texline_polygons_marginals, textline_mask_tot_ea, num_col_classifier)
|
||||
all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew)
|
||||
all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier)
|
||||
all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew)
|
||||
all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier)
|
||||
|
||||
if self.full_layout:
|
||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||
contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con])
|
||||
if self.light_version:
|
||||
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered)
|
||||
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered)
|
||||
else:
|
||||
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered)
|
||||
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered)
|
||||
else:
|
||||
#takes long timee
|
||||
contours_only_text_parent_d_ordered = None
|
||||
if self.light_version:
|
||||
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered)
|
||||
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered)
|
||||
else:
|
||||
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered)
|
||||
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered)
|
||||
|
||||
if self.plotter:
|
||||
self.plotter.save_plot_of_layout(text_regions_p, image_page)
|
||||
|
@ -3045,7 +3045,7 @@ class Eynollah:
|
|||
|
||||
pixel_img = 4
|
||||
polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img)
|
||||
all_found_texline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, kernel=KERNEL, curved_line=self.curved_line)
|
||||
all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line)
|
||||
pixel_lines = 6
|
||||
|
||||
|
||||
|
@ -3091,7 +3091,7 @@ class Eynollah:
|
|||
else:
|
||||
order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d)
|
||||
|
||||
pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml)
|
||||
pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml)
|
||||
self.logger.info("Job done in %.1fs", time.time() - t0)
|
||||
##return pcgts
|
||||
else:
|
||||
|
@ -3101,7 +3101,7 @@ class Eynollah:
|
|||
else:
|
||||
contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con])
|
||||
order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d)
|
||||
pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_texline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables)
|
||||
pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables)
|
||||
self.logger.info("Job done in %.1fs", time.time() - t0)
|
||||
##return pcgts
|
||||
self.writer.write_pagexml(pcgts)
|
||||
|
|
|
@ -796,7 +796,7 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch):
|
|||
|
||||
return layout_in_patch
|
||||
|
||||
def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_texline_polygons,slopes,contours_only_text_parent_d_ordered):
|
||||
def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_textline_polygons,slopes,contours_only_text_parent_d_ordered):
|
||||
|
||||
cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent)
|
||||
|
||||
|
@ -805,8 +805,8 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions
|
|||
|
||||
|
||||
|
||||
all_found_texline_polygons_main=[]
|
||||
all_found_texline_polygons_head=[]
|
||||
all_found_textline_polygons_main=[]
|
||||
all_found_textline_polygons_head=[]
|
||||
|
||||
all_box_coord_main=[]
|
||||
all_box_coord_head=[]
|
||||
|
@ -840,7 +840,7 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions
|
|||
contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
|
||||
all_box_coord_head.append(all_box_coord[ii])
|
||||
slopes_head.append(slopes[ii])
|
||||
all_found_texline_polygons_head.append(all_found_texline_polygons[ii])
|
||||
all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
|
||||
else:
|
||||
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1
|
||||
contours_only_text_parent_main.append(con)
|
||||
|
@ -848,14 +848,14 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions
|
|||
contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii])
|
||||
all_box_coord_main.append(all_box_coord[ii])
|
||||
slopes_main.append(slopes[ii])
|
||||
all_found_texline_polygons_main.append(all_found_texline_polygons[ii])
|
||||
all_found_textline_polygons_main.append(all_found_textline_polygons[ii])
|
||||
|
||||
#print(all_pixels,pixels_main,pixels_header)
|
||||
|
||||
return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_texline_polygons_main,all_found_texline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d
|
||||
return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_textline_polygons_main,all_found_textline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d
|
||||
|
||||
|
||||
def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_texline_polygons,slopes,contours_only_text_parent_d_ordered):
|
||||
def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_textline_polygons,slopes,contours_only_text_parent_d_ordered):
|
||||
|
||||
### to make it faster
|
||||
h_o = regions_model_1.shape[0]
|
||||
|
@ -874,8 +874,8 @@ def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,r
|
|||
|
||||
|
||||
|
||||
all_found_texline_polygons_main=[]
|
||||
all_found_texline_polygons_head=[]
|
||||
all_found_textline_polygons_main=[]
|
||||
all_found_textline_polygons_head=[]
|
||||
|
||||
all_box_coord_main=[]
|
||||
all_box_coord_head=[]
|
||||
|
@ -909,7 +909,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,r
|
|||
contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
|
||||
all_box_coord_head.append(all_box_coord[ii])
|
||||
slopes_head.append(slopes[ii])
|
||||
all_found_texline_polygons_head.append(all_found_texline_polygons[ii])
|
||||
all_found_textline_polygons_head.append(all_found_textline_polygons[ii])
|
||||
else:
|
||||
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1
|
||||
contours_only_text_parent_main.append(con)
|
||||
|
@ -917,7 +917,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,r
|
|||
contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii])
|
||||
all_box_coord_main.append(all_box_coord[ii])
|
||||
slopes_main.append(slopes[ii])
|
||||
all_found_texline_polygons_main.append(all_found_texline_polygons[ii])
|
||||
all_found_textline_polygons_main.append(all_found_textline_polygons[ii])
|
||||
|
||||
#print(all_pixels,pixels_main,pixels_header)
|
||||
|
||||
|
@ -931,7 +931,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,r
|
|||
contours_only_text_parent_main = [ (i*3.).astype(np.int32) for i in contours_only_text_parent_main]
|
||||
###
|
||||
|
||||
return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_texline_polygons_main,all_found_texline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d
|
||||
return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_textline_polygons_main,all_found_textline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d
|
||||
|
||||
def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col):
|
||||
# print(textlines_con)
|
||||
|
|
|
@ -7,13 +7,13 @@ class EynollahIdCounter():
|
|||
|
||||
def __init__(self, region_idx=0, line_idx=0):
|
||||
self._counter = Counter()
|
||||
self._inital_region_idx = region_idx
|
||||
self._inital_line_idx = line_idx
|
||||
self._initial_region_idx = region_idx
|
||||
self._initial_line_idx = line_idx
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.set('region', self._inital_region_idx)
|
||||
self.set('line', self._inital_line_idx)
|
||||
self.set('region', self._initial_region_idx)
|
||||
self.set('line', self._initial_line_idx)
|
||||
|
||||
def inc(self, name, val=1):
|
||||
self._counter.update({name: val})
|
||||
|
|
|
@ -13,13 +13,13 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
contours_only_text_parent_h,
|
||||
all_box_coord,
|
||||
all_box_coord_h,
|
||||
all_found_texline_polygons,
|
||||
all_found_texline_polygons_h,
|
||||
all_found_textline_polygons,
|
||||
all_found_textline_polygons_h,
|
||||
kernel=None,
|
||||
curved_line=False,
|
||||
):
|
||||
# print(np.shape(all_found_texline_polygons),np.shape(all_found_texline_polygons[3]),'all_found_texline_polygonsshape')
|
||||
# print(all_found_texline_polygons[3])
|
||||
# print(np.shape(all_found_textline_polygons),np.shape(all_found_textline_polygons[3]),'all_found_textline_polygonsshape')
|
||||
# print(all_found_textline_polygons[3])
|
||||
cx_m, cy_m, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent)
|
||||
cx_h, cy_h, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_h)
|
||||
cx_d, cy_d, _, _, y_min_d, y_max_d, _ = find_new_features_of_contours(polygons_of_drop_capitals)
|
||||
|
@ -87,9 +87,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1
|
||||
|
||||
# print(region_final,'region_final')
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_texline_polygons[int(region_final)])
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
try:
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_texline_polygons[int(region_final)])
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
|
@ -105,9 +105,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
|
||||
# print(arg_min)
|
||||
|
||||
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 0, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0]
|
||||
cnt_nearest = np.copy(all_found_textline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 0, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0]
|
||||
|
||||
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
|
||||
|
@ -131,7 +131,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
|
||||
# contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
|
||||
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
|
||||
except:
|
||||
# print('gordun1')
|
||||
|
@ -139,11 +139,11 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
elif len(region_with_intersected_drop) == 1:
|
||||
region_final = region_with_intersected_drop[0] - 1
|
||||
|
||||
# areas_main=np.array([cv2.contourArea(all_found_texline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_texline_polygons[int(region_final)]))])
|
||||
# areas_main=np.array([cv2.contourArea(all_found_textline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_textline_polygons[int(region_final)]))])
|
||||
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_texline_polygons[int(region_final)])
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
try:
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_texline_polygons[int(region_final)])
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
|
@ -157,9 +157,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
|
||||
# print(arg_min)
|
||||
|
||||
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 0, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0]
|
||||
cnt_nearest = np.copy(all_found_textline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 0, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0]
|
||||
|
||||
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
|
||||
|
@ -184,15 +184,15 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
# contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
|
||||
# contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
|
||||
# print(np.shape(contours_biggest),'contours_biggest')
|
||||
# print(np.shape(all_found_texline_polygons[int(region_final)][arg_min]))
|
||||
# print(np.shape(all_found_textline_polygons[int(region_final)][arg_min]))
|
||||
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
# print(all_found_texline_polygons[j_cont][0])
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_texline_polygons[int(region_final)])
|
||||
# print(all_found_textline_polygons[j_cont][0])
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
|
@ -206,9 +206,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
|
||||
# print(arg_min)
|
||||
|
||||
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 0, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0]
|
||||
cnt_nearest = np.copy(all_found_textline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 0, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0]
|
||||
|
||||
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
|
||||
|
@ -231,15 +231,15 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] # -all_box_coord[int(region_final)][0]
|
||||
|
||||
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
# all_found_texline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
# all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_texline_polygons[int(region_final)])
|
||||
##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
###print(all_box_coord[j_cont])
|
||||
###print(cx_t)
|
||||
###print(cy_t)
|
||||
|
@ -253,9 +253,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
##arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
|
||||
###print(arg_min)
|
||||
|
||||
##cnt_nearest=np.copy(all_found_texline_polygons[int(region_final)][arg_min])
|
||||
##cnt_nearest[:,0,0]=all_found_texline_polygons[int(region_final)][arg_min][:,0,0]#+all_box_coord[int(region_final)][2]
|
||||
##cnt_nearest[:,0,1]=all_found_texline_polygons[int(region_final)][arg_min][:,0,1]#+all_box_coord[int(region_final)][0]
|
||||
##cnt_nearest=np.copy(all_found_textline_polygons[int(region_final)][arg_min])
|
||||
##cnt_nearest[:,0,0]=all_found_textline_polygons[int(region_final)][arg_min][:,0,0]#+all_box_coord[int(region_final)][2]
|
||||
##cnt_nearest[:,0,1]=all_found_textline_polygons[int(region_final)][arg_min][:,0,1]#+all_box_coord[int(region_final)][0]
|
||||
|
||||
##img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||
##img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
|
||||
|
@ -281,7 +281,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
##contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
|
||||
|
||||
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
##all_found_texline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||
##all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||
|
||||
else:
|
||||
if len(region_with_intersected_drop) > 1:
|
||||
|
@ -293,9 +293,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1
|
||||
|
||||
# print(region_final,'region_final')
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_texline_polygons[int(region_final)])
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
try:
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_texline_polygons[int(region_final)])
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
|
@ -311,9 +311,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
|
||||
# print(arg_min)
|
||||
|
||||
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0] + all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 1] + all_box_coord[int(region_final)][0]
|
||||
cnt_nearest = np.copy(all_found_textline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0] + all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 1] + all_box_coord[int(region_final)][0]
|
||||
|
||||
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
|
||||
|
@ -337,7 +337,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
|
||||
contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2])
|
||||
|
||||
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
|
||||
except:
|
||||
# print('gordun1')
|
||||
|
@ -345,14 +345,14 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
elif len(region_with_intersected_drop) == 1:
|
||||
region_final = region_with_intersected_drop[0] - 1
|
||||
|
||||
# areas_main=np.array([cv2.contourArea(all_found_texline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_texline_polygons[int(region_final)]))])
|
||||
# areas_main=np.array([cv2.contourArea(all_found_textline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_textline_polygons[int(region_final)]))])
|
||||
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_texline_polygons[int(region_final)])
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
|
||||
# print(cx_t,'print')
|
||||
try:
|
||||
# print(all_found_texline_polygons[j_cont][0])
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_texline_polygons[int(region_final)])
|
||||
# print(all_found_textline_polygons[j_cont][0])
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
|
@ -366,9 +366,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
|
||||
# print(arg_min)
|
||||
|
||||
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0] + all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 1] + all_box_coord[int(region_final)][0]
|
||||
cnt_nearest = np.copy(all_found_textline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0] = all_found_textline_polygons[int(region_final)][arg_min][:, 0] + all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 1] = all_found_textline_polygons[int(region_final)][arg_min][:, 1] + all_box_coord[int(region_final)][0]
|
||||
|
||||
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
|
||||
|
@ -391,8 +391,8 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0]
|
||||
|
||||
contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2])
|
||||
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
# all_found_texline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
# all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||
|
||||
except:
|
||||
pass
|
||||
|
@ -417,8 +417,8 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
######plt.show()
|
||||
#####try:
|
||||
#####if len(contours_new_parent)==1:
|
||||
######print(all_found_texline_polygons[j_cont][0])
|
||||
#####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_texline_polygons[j_cont])
|
||||
######print(all_found_textline_polygons[j_cont][0])
|
||||
#####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont])
|
||||
######print(all_box_coord[j_cont])
|
||||
######print(cx_t)
|
||||
######print(cy_t)
|
||||
|
@ -431,9 +431,9 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
#####arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
|
||||
######print(arg_min)
|
||||
|
||||
#####cnt_nearest=np.copy(all_found_texline_polygons[j_cont][arg_min])
|
||||
#####cnt_nearest[:,0]=all_found_texline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2]
|
||||
#####cnt_nearest[:,1]=all_found_texline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0]
|
||||
#####cnt_nearest=np.copy(all_found_textline_polygons[j_cont][arg_min])
|
||||
#####cnt_nearest[:,0]=all_found_textline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2]
|
||||
#####cnt_nearest[:,1]=all_found_textline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0]
|
||||
|
||||
#####img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||
#####img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
|
||||
|
@ -454,7 +454,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
#####contours_biggest[:,0,0]=contours_biggest[:,0,0]-all_box_coord[j_cont][2]
|
||||
#####contours_biggest[:,0,1]=contours_biggest[:,0,1]-all_box_coord[j_cont][0]
|
||||
|
||||
#####all_found_texline_polygons[j_cont][arg_min]=contours_biggest
|
||||
#####all_found_textline_polygons[j_cont][arg_min]=contours_biggest
|
||||
######print(contours_biggest)
|
||||
######plt.imshow(img_textlines[:,:,0])
|
||||
######plt.show()
|
||||
|
@ -462,7 +462,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
#####pass
|
||||
#####except:
|
||||
#####pass
|
||||
return all_found_texline_polygons
|
||||
return all_found_textline_polygons
|
||||
|
||||
def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1):
|
||||
|
||||
|
|
|
@ -54,54 +54,54 @@ class EynollahXmlWriter():
|
|||
points_page_print = points_page_print + ' '
|
||||
return points_page_print[:-1]
|
||||
|
||||
def serialize_lines_in_marginal(self, marginal_region, all_found_texline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter):
|
||||
for j in range(len(all_found_texline_polygons_marginals[marginal_idx])):
|
||||
def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter):
|
||||
for j in range(len(all_found_textline_polygons_marginals[marginal_idx])):
|
||||
coords = CoordsType()
|
||||
textline = TextLineType(id=counter.next_line_id, Coords=coords)
|
||||
marginal_region.add_TextLine(textline)
|
||||
points_co = ''
|
||||
for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])):
|
||||
for l in range(len(all_found_textline_polygons_marginals[marginal_idx][j])):
|
||||
if not (self.curved_line or self.textline_light):
|
||||
if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
|
||||
textline_x_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) )
|
||||
textline_y_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) )
|
||||
if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2:
|
||||
textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) )
|
||||
textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) )
|
||||
else:
|
||||
textline_x_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) )
|
||||
textline_y_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) )
|
||||
textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) )
|
||||
textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) )
|
||||
points_co += str(textline_x_coord)
|
||||
points_co += ','
|
||||
points_co += str(textline_y_coord)
|
||||
if (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) <= 45:
|
||||
if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
|
||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x))
|
||||
if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2:
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y))
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y))
|
||||
else:
|
||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x))
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y))
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y))
|
||||
|
||||
elif (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) > 45:
|
||||
if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
|
||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
|
||||
if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2:
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
|
||||
else:
|
||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
|
||||
points_co += ','
|
||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
|
||||
points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
|
||||
points_co += ' '
|
||||
coords.set_points(points_co[:-1])
|
||||
|
||||
def serialize_lines_in_region(self, text_region, all_found_texline_polygons, region_idx, page_coord, all_box_coord, slopes, counter):
|
||||
def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter):
|
||||
self.logger.debug('enter serialize_lines_in_region')
|
||||
for j in range(len(all_found_texline_polygons[region_idx])):
|
||||
for j in range(len(all_found_textline_polygons[region_idx])):
|
||||
coords = CoordsType()
|
||||
textline = TextLineType(id=counter.next_line_id, Coords=coords)
|
||||
text_region.add_TextLine(textline)
|
||||
region_bboxes = all_box_coord[region_idx]
|
||||
points_co = ''
|
||||
for idx_contour_textline, contour_textline in enumerate(all_found_texline_polygons[region_idx][j]):
|
||||
for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[region_idx][j]):
|
||||
if not (self.curved_line or self.textline_light):
|
||||
if len(contour_textline) == 2:
|
||||
textline_x_coord = max(0, int((contour_textline[0] + region_bboxes[2] + page_coord[2]) / self.scale_x))
|
||||
|
@ -140,7 +140,7 @@ class EynollahXmlWriter():
|
|||
with open(out_fname, 'w') as f:
|
||||
f.write(to_xml(pcgts))
|
||||
|
||||
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables):
|
||||
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables):
|
||||
self.logger.debug('enter build_pagexml_no_full_layout')
|
||||
|
||||
# create the file structure
|
||||
|
@ -159,13 +159,13 @@ class EynollahXmlWriter():
|
|||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord)),
|
||||
)
|
||||
page.add_TextRegion(textregion)
|
||||
self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, counter)
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter)
|
||||
|
||||
for mm in range(len(found_polygons_marginals)):
|
||||
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord)))
|
||||
page.add_TextRegion(marginal)
|
||||
self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter)
|
||||
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter)
|
||||
|
||||
for mm in range(len(found_polygons_text_region_img)):
|
||||
img_region = ImageRegionType(id=counter.next_region_id, Coords=CoordsType())
|
||||
|
@ -201,7 +201,7 @@ class EynollahXmlWriter():
|
|||
|
||||
return pcgts
|
||||
|
||||
def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml):
|
||||
def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml):
|
||||
self.logger.debug('enter build_pagexml_full_layout')
|
||||
|
||||
# create the file structure
|
||||
|
@ -218,20 +218,20 @@ class EynollahXmlWriter():
|
|||
textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord)))
|
||||
page.add_TextRegion(textregion)
|
||||
self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, counter)
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter)
|
||||
|
||||
self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
|
||||
for mm in range(len(found_polygons_text_region_h)):
|
||||
textregion = TextRegionType(id=counter.next_region_id, type_='header',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord)))
|
||||
page.add_TextRegion(textregion)
|
||||
self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter)
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter)
|
||||
|
||||
for mm in range(len(found_polygons_marginals)):
|
||||
marginal = TextRegionType(id=counter.next_region_id, type_='marginalia',
|
||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord)))
|
||||
page.add_TextRegion(marginal)
|
||||
self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter)
|
||||
self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter)
|
||||
|
||||
for mm in range(len(found_polygons_drop_capitals)):
|
||||
page.add_TextRegion(TextRegionType(id=counter.next_region_id, type_='drop-capital',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue