mirror of
				https://github.com/qurator-spk/eynollah.git
				synced 2025-10-31 01:34:17 +01:00 
			
		
		
		
	flow from directory
This commit is contained in:
		
							parent
							
								
									cf5ef8f5ae
								
							
						
					
					
						commit
						c606391c31
					
				
					 4 changed files with 767 additions and 437 deletions
				
			
		|  | @ -10,7 +10,6 @@ from qurator.eynollah.eynollah import Eynollah | ||||||
|     "-i", |     "-i", | ||||||
|     help="image filename", |     help="image filename", | ||||||
|     type=click.Path(exists=True, dir_okay=False), |     type=click.Path(exists=True, dir_okay=False), | ||||||
|     required=True, |  | ||||||
| ) | ) | ||||||
| @click.option( | @click.option( | ||||||
|     "--out", |     "--out", | ||||||
|  | @ -19,6 +18,12 @@ from qurator.eynollah.eynollah import Eynollah | ||||||
|     type=click.Path(exists=True, file_okay=False), |     type=click.Path(exists=True, file_okay=False), | ||||||
|     required=True, |     required=True, | ||||||
| ) | ) | ||||||
|  | @click.option( | ||||||
|  |     "--dir_in", | ||||||
|  |     "-di", | ||||||
|  |     help="directory of images", | ||||||
|  |     type=click.Path(exists=True, file_okay=False), | ||||||
|  | ) | ||||||
| @click.option( | @click.option( | ||||||
|     "--model", |     "--model", | ||||||
|     "-m", |     "-m", | ||||||
|  | @ -112,6 +117,7 @@ from qurator.eynollah.eynollah import Eynollah | ||||||
| def main( | def main( | ||||||
|     image, |     image, | ||||||
|     out, |     out, | ||||||
|  |     dir_in, | ||||||
|     model, |     model, | ||||||
|     save_images, |     save_images, | ||||||
|     save_layout, |     save_layout, | ||||||
|  | @ -140,6 +146,7 @@ def main( | ||||||
|     eynollah = Eynollah( |     eynollah = Eynollah( | ||||||
|         image_filename=image, |         image_filename=image, | ||||||
|         dir_out=out, |         dir_out=out, | ||||||
|  |         dir_in=dir_in, | ||||||
|         dir_models=model, |         dir_models=model, | ||||||
|         dir_of_cropped_images=save_images, |         dir_of_cropped_images=save_images, | ||||||
|         dir_of_layout=save_layout, |         dir_of_layout=save_layout, | ||||||
|  | @ -155,8 +162,9 @@ def main( | ||||||
|         headers_off=headers_off, |         headers_off=headers_off, | ||||||
|         light_version=light_version, |         light_version=light_version, | ||||||
|     ) |     ) | ||||||
|     pcgts = eynollah.run() |     eynollah.run() | ||||||
|     eynollah.writer.write_pagexml(pcgts) |     #pcgts = eynollah.run() | ||||||
|  |     ##eynollah.writer.write_pagexml(pcgts) | ||||||
| 
 | 
 | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     main() |     main() | ||||||
|  |  | ||||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -797,6 +797,76 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): | ||||||
|     return layout_in_patch |     return layout_in_patch | ||||||
| 
 | 
 | ||||||
| def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_texline_polygons,slopes,contours_only_text_parent_d_ordered): | def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_texline_polygons,slopes,contours_only_text_parent_d_ordered): | ||||||
|  |      | ||||||
|  |     cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent) | ||||||
|  | 
 | ||||||
|  |     length_con=x_max_main-x_min_main | ||||||
|  |     height_con=y_max_main-y_min_main | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     all_found_texline_polygons_main=[] | ||||||
|  |     all_found_texline_polygons_head=[] | ||||||
|  | 
 | ||||||
|  |     all_box_coord_main=[] | ||||||
|  |     all_box_coord_head=[] | ||||||
|  | 
 | ||||||
|  |     slopes_main=[] | ||||||
|  |     slopes_head=[] | ||||||
|  | 
 | ||||||
|  |     contours_only_text_parent_main=[] | ||||||
|  |     contours_only_text_parent_head=[] | ||||||
|  | 
 | ||||||
|  |     contours_only_text_parent_main_d=[] | ||||||
|  |     contours_only_text_parent_head_d=[] | ||||||
|  | 
 | ||||||
|  |     for ii in range(len(contours_only_text_parent)): | ||||||
|  |         con=contours_only_text_parent[ii] | ||||||
|  |         img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) | ||||||
|  |         img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |         all_pixels=((img[:,:,0]==255)*1).sum() | ||||||
|  | 
 | ||||||
|  |         pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() | ||||||
|  |         pixels_main=all_pixels-pixels_header | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |         if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): | ||||||
|  |             regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 | ||||||
|  |             contours_only_text_parent_head.append(con) | ||||||
|  |             if contours_only_text_parent_d_ordered is not None: | ||||||
|  |                 contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) | ||||||
|  |             all_box_coord_head.append(all_box_coord[ii]) | ||||||
|  |             slopes_head.append(slopes[ii]) | ||||||
|  |             all_found_texline_polygons_head.append(all_found_texline_polygons[ii]) | ||||||
|  |         else: | ||||||
|  |             regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 | ||||||
|  |             contours_only_text_parent_main.append(con) | ||||||
|  |             if contours_only_text_parent_d_ordered is not None: | ||||||
|  |                 contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) | ||||||
|  |             all_box_coord_main.append(all_box_coord[ii]) | ||||||
|  |             slopes_main.append(slopes[ii]) | ||||||
|  |             all_found_texline_polygons_main.append(all_found_texline_polygons[ii]) | ||||||
|  | 
 | ||||||
|  |         #print(all_pixels,pixels_main,pixels_header) | ||||||
|  | 
 | ||||||
|  |     return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_texline_polygons_main,all_found_texline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_texline_polygons,slopes,contours_only_text_parent_d_ordered): | ||||||
|  |      | ||||||
|  |     ### to make it faster | ||||||
|  |     h_o = regions_model_1.shape[0] | ||||||
|  |     w_o = regions_model_1.shape[1] | ||||||
|  |      | ||||||
|  |     regions_model_1 = cv2.resize(regions_model_1, (int(regions_model_1.shape[1]/3.), int(regions_model_1.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) | ||||||
|  |     regions_model_full = cv2.resize(regions_model_full, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) | ||||||
|  |     contours_only_text_parent = [ (i/3.).astype(np.int32) for i in  contours_only_text_parent] | ||||||
|  | 
 | ||||||
|  |     ### | ||||||
|  |      | ||||||
|     cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent) |     cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent) | ||||||
| 
 | 
 | ||||||
|     length_con=x_max_main-x_min_main |     length_con=x_max_main-x_min_main | ||||||
|  | @ -853,8 +923,14 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|         #plt.imshow(img[:,:,0]) |     ### to make it faster | ||||||
|         #plt.show() |      | ||||||
|  |     regions_model_1 = cv2.resize(regions_model_1, (w_o, h_o), interpolation=cv2.INTER_NEAREST) | ||||||
|  |     #regions_model_full = cv2.resize(img, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) | ||||||
|  |     contours_only_text_parent_head = [ (i*3.).astype(np.int32) for i in  contours_only_text_parent_head] | ||||||
|  |     contours_only_text_parent_main = [ (i*3.).astype(np.int32) for i in  contours_only_text_parent_main] | ||||||
|  |     ### | ||||||
|  |      | ||||||
|     return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_texline_polygons_main,all_found_texline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d |     return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_texline_polygons_main,all_found_texline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d | ||||||
| 
 | 
 | ||||||
| def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col): | def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col): | ||||||
|  |  | ||||||
|  | @ -3,7 +3,8 @@ import numpy as np | ||||||
| from shapely import geometry | from shapely import geometry | ||||||
| 
 | 
 | ||||||
| from .rotate import rotate_image, rotation_image_new | from .rotate import rotate_image, rotation_image_new | ||||||
| 
 | from multiprocessing import Process, Queue, cpu_count | ||||||
|  | from multiprocessing import Pool | ||||||
| def contours_in_same_horizon(cy_main_hor): | def contours_in_same_horizon(cy_main_hor): | ||||||
|     X1 = np.zeros((len(cy_main_hor), len(cy_main_hor))) |     X1 = np.zeros((len(cy_main_hor), len(cy_main_hor))) | ||||||
|     X2 = np.zeros((len(cy_main_hor), len(cy_main_hor))) |     X2 = np.zeros((len(cy_main_hor), len(cy_main_hor))) | ||||||
|  | @ -147,6 +148,96 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): | ||||||
| 
 | 
 | ||||||
|     return contours_imgs |     return contours_imgs | ||||||
| 
 | 
 | ||||||
|  | def do_work_of_contours_in_image(queue_of_all_params, contours_per_process, indexes_r_con_per_pro, img, slope_first): | ||||||
|  |     cnts_org_per_each_subprocess = [] | ||||||
|  |     index_by_text_region_contours = [] | ||||||
|  |     for mv in range(len(contours_per_process)): | ||||||
|  |         index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) | ||||||
|  |          | ||||||
|  |         img_copy = np.zeros(img.shape) | ||||||
|  |         img_copy = cv2.fillPoly(img_copy, pts=[contours_per_process[mv]], color=(1, 1, 1)) | ||||||
|  | 
 | ||||||
|  |         img_copy = rotation_image_new(img_copy, -slope_first) | ||||||
|  | 
 | ||||||
|  |         img_copy = img_copy.astype(np.uint8) | ||||||
|  |         imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) | ||||||
|  |         ret, thresh = cv2.threshold(imgray, 0, 255, 0) | ||||||
|  | 
 | ||||||
|  |         cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | ||||||
|  | 
 | ||||||
|  |         cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) | ||||||
|  |         cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |         cnts_org_per_each_subprocess.append(cont_int[0]) | ||||||
|  | 
 | ||||||
|  |     queue_of_all_params.put([ cnts_org_per_each_subprocess, index_by_text_region_contours]) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_textregion_contours_in_org_image_multi(cnts, img, slope_first): | ||||||
|  |      | ||||||
|  |     num_cores = cpu_count() | ||||||
|  |     queue_of_all_params = Queue() | ||||||
|  | 
 | ||||||
|  |     processes = [] | ||||||
|  |     nh = np.linspace(0, len(cnts), num_cores + 1) | ||||||
|  |     indexes_by_text_con = np.array(range(len(cnts))) | ||||||
|  |     for i in range(num_cores): | ||||||
|  |         contours_per_process = cnts[int(nh[i]) : int(nh[i + 1])] | ||||||
|  |         indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] | ||||||
|  | 
 | ||||||
|  |         processes.append(Process(target=do_work_of_contours_in_image, args=(queue_of_all_params, contours_per_process, indexes_text_con_per_process, img,slope_first ))) | ||||||
|  |     for i in range(num_cores): | ||||||
|  |         processes[i].start() | ||||||
|  |     cnts_org = [] | ||||||
|  |     all_index_text_con = [] | ||||||
|  |     for i in range(num_cores): | ||||||
|  |         list_all_par = queue_of_all_params.get(True) | ||||||
|  |         contours_for_sub_process = list_all_par[0] | ||||||
|  |         indexes_for_sub_process = list_all_par[1] | ||||||
|  |         for j in range(len(contours_for_sub_process)): | ||||||
|  |             cnts_org.append(contours_for_sub_process[j]) | ||||||
|  |             all_index_text_con.append(indexes_for_sub_process[j]) | ||||||
|  |     for i in range(num_cores): | ||||||
|  |         processes[i].join() | ||||||
|  | 
 | ||||||
|  |     print(all_index_text_con) | ||||||
|  |     return cnts_org | ||||||
|  | def loop_contour_image(index_l, cnts,img, slope_first): | ||||||
|  |     img_copy = np.zeros(img.shape) | ||||||
|  |     img_copy = cv2.fillPoly(img_copy, pts=[cnts[index_l]], color=(1, 1, 1)) | ||||||
|  | 
 | ||||||
|  |     # plt.imshow(img_copy) | ||||||
|  |     # plt.show() | ||||||
|  | 
 | ||||||
|  |     # print(img.shape,'img') | ||||||
|  |     img_copy = rotation_image_new(img_copy, -slope_first) | ||||||
|  |     ##print(img_copy.shape,'img_copy') | ||||||
|  |     # plt.imshow(img_copy) | ||||||
|  |     # plt.show() | ||||||
|  | 
 | ||||||
|  |     img_copy = img_copy.astype(np.uint8) | ||||||
|  |     imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) | ||||||
|  |     ret, thresh = cv2.threshold(imgray, 0, 255, 0) | ||||||
|  | 
 | ||||||
|  |     cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | ||||||
|  | 
 | ||||||
|  |     cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) | ||||||
|  |     cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) | ||||||
|  |     # print(np.shape(cont_int[0])) | ||||||
|  |     return cont_int[0] | ||||||
|  | 
 | ||||||
|  | def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first): | ||||||
|  | 
 | ||||||
|  |     cnts_org = [] | ||||||
|  |     # print(cnts,'cnts') | ||||||
|  |     with Pool(cpu_count()) as p: | ||||||
|  |         cnts_org = p.starmap(loop_contour_image, [(index_l,cnts, img,slope_first) for index_l in range(len(cnts))]) | ||||||
|  |          | ||||||
|  |     print(len(cnts_org),'lendiha') | ||||||
|  | 
 | ||||||
|  |     return cnts_org | ||||||
|  | 
 | ||||||
| def get_textregion_contours_in_org_image(cnts, img, slope_first): | def get_textregion_contours_in_org_image(cnts, img, slope_first): | ||||||
| 
 | 
 | ||||||
|     cnts_org = [] |     cnts_org = [] | ||||||
|  | @ -175,11 +266,43 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): | ||||||
|         # print(np.shape(cont_int[0])) |         # print(np.shape(cont_int[0])) | ||||||
|         cnts_org.append(cont_int[0]) |         cnts_org.append(cont_int[0]) | ||||||
| 
 | 
 | ||||||
|     # print(cnts_org,'cnts_org') |     return cnts_org | ||||||
|  | 
 | ||||||
|  | def get_textregion_contours_in_org_image_light(cnts, img, slope_first): | ||||||
|  |      | ||||||
|  |     h_o = img.shape[0] | ||||||
|  |     w_o = img.shape[1] | ||||||
|  |      | ||||||
|  |     img = cv2.resize(img, (int(img.shape[1]/3.), int(img.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) | ||||||
|  |     ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) | ||||||
|  |     #cnts = cnts/2 | ||||||
|  |     cnts = [(i/ 3).astype(np.int32) for i in cnts] | ||||||
|  |     cnts_org = [] | ||||||
|  |     #print(cnts,'cnts') | ||||||
|  |     for i in range(len(cnts)): | ||||||
|  |         img_copy = np.zeros(img.shape) | ||||||
|  |         img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1)) | ||||||
|  | 
 | ||||||
|  |         # plt.imshow(img_copy) | ||||||
|  |         # plt.show() | ||||||
|  | 
 | ||||||
|  |         # print(img.shape,'img') | ||||||
|  |         img_copy = rotation_image_new(img_copy, -slope_first) | ||||||
|  |         ##print(img_copy.shape,'img_copy') | ||||||
|  |         # plt.imshow(img_copy) | ||||||
|  |         # plt.show() | ||||||
|  | 
 | ||||||
|  |         img_copy = img_copy.astype(np.uint8) | ||||||
|  |         imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) | ||||||
|  |         ret, thresh = cv2.threshold(imgray, 0, 255, 0) | ||||||
|  | 
 | ||||||
|  |         cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | ||||||
|  | 
 | ||||||
|  |         cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) | ||||||
|  |         cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) | ||||||
|  |         # print(np.shape(cont_int[0])) | ||||||
|  |         cnts_org.append(cont_int[0]*3) | ||||||
| 
 | 
 | ||||||
|     # sys.exit() |  | ||||||
|     # self.y_shift = np.abs(img_copy.shape[0] - img.shape[0]) |  | ||||||
|     # self.x_shift = np.abs(img_copy.shape[1] - img.shape[1]) |  | ||||||
|     return cnts_org |     return cnts_org | ||||||
| 
 | 
 | ||||||
| def return_contours_of_interested_textline(region_pre_p, pixel): | def return_contours_of_interested_textline(region_pre_p, pixel): | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue