mirror of
				https://github.com/qurator-spk/eynollah.git
				synced 2025-10-31 17:54:12 +01:00 
			
		
		
		
	Merge pull request #102 from qurator-spk/right2left_reading_order
Right2left reading order
This commit is contained in:
		
						commit
						68923e0a5d
					
				
					 3 changed files with 57 additions and 11 deletions
				
			
		|  | @ -97,6 +97,12 @@ from qurator.eynollah.eynollah import Eynollah | |||
|     is_flag=True, | ||||
|     help="if this parameter set to true, this tool will try to detect tables.", | ||||
| ) | ||||
| @click.option( | ||||
|     "--right2left/--left2right", | ||||
|     "-r2l/-l2r", | ||||
|     is_flag=True, | ||||
|     help="if this parameter set to true, this tool will extract right-to-left reading order.", | ||||
| ) | ||||
| @click.option( | ||||
|     "--input_binary/--input-RGB", | ||||
|     "-ib/-irgb", | ||||
|  | @ -149,6 +155,7 @@ def main( | |||
|     textline_light, | ||||
|     full_layout, | ||||
|     tables, | ||||
|     right2left, | ||||
|     input_binary, | ||||
|     allow_scaling, | ||||
|     headers_off, | ||||
|  | @ -184,6 +191,7 @@ def main( | |||
|         textline_light=textline_light, | ||||
|         full_layout=full_layout, | ||||
|         tables=tables, | ||||
|         right2left=right2left, | ||||
|         input_binary=input_binary, | ||||
|         allow_scaling=allow_scaling, | ||||
|         headers_off=headers_off, | ||||
|  |  | |||
|  | @ -158,6 +158,7 @@ class Eynollah: | |||
|         textline_light=False, | ||||
|         full_layout=False, | ||||
|         tables=False, | ||||
|         right2left=False, | ||||
|         input_binary=False, | ||||
|         allow_scaling=False, | ||||
|         headers_off=False, | ||||
|  | @ -189,6 +190,7 @@ class Eynollah: | |||
|         self.textline_light = textline_light | ||||
|         self.full_layout = full_layout | ||||
|         self.tables = tables | ||||
|         self.right2left = right2left | ||||
|         self.input_binary = input_binary | ||||
|         self.allow_scaling = allow_scaling | ||||
|         self.headers_off = headers_off | ||||
|  | @ -2069,6 +2071,7 @@ class Eynollah: | |||
|             arg_text_con = [] | ||||
|             for ii in range(len(cx_text_only)): | ||||
|                 for jj in range(len(boxes)): | ||||
|                     print(cx_text_only[ii],cy_text_only[ii],'markaz') | ||||
|                     if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]:  # this is valid if the center of region identify in which box it is located | ||||
|                         arg_text_con.append(jj) | ||||
|                         break | ||||
|  | @ -2104,6 +2107,9 @@ class Eynollah: | |||
|                 ref_point += len(id_of_texts) | ||||
| 
 | ||||
|             order_of_texts_tot = [] | ||||
|             print(len(contours_only_text_parent),'contours_only_text_parent') | ||||
|             print(len(order_by_con_main),'order_by_con_main') | ||||
|              | ||||
|             for tj1 in range(len(contours_only_text_parent)): | ||||
|                 order_of_texts_tot.append(int(order_by_con_main[tj1])) | ||||
| 
 | ||||
|  | @ -2618,7 +2624,7 @@ class Eynollah: | |||
|                 regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) | ||||
|         t1 = time.time() | ||||
|         if np.abs(slope_deskew) < SLOPE_THRESHOLD: | ||||
|             boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables) | ||||
|             boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) | ||||
|             boxes_d = None | ||||
|             self.logger.debug("len(boxes): %s", len(boxes)) | ||||
|              | ||||
|  | @ -2628,7 +2634,7 @@ class Eynollah: | |||
|             img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) | ||||
|             img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) | ||||
|         else: | ||||
|             boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables) | ||||
|             boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) | ||||
|             boxes = None | ||||
|             self.logger.debug("len(boxes): %s", len(boxes_d)) | ||||
|              | ||||
|  | @ -2713,7 +2719,7 @@ class Eynollah: | |||
|                 pass | ||||
|              | ||||
|             if np.abs(slope_deskew) < SLOPE_THRESHOLD: | ||||
|                 boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables) | ||||
|                 boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) | ||||
|                 text_regions_p_tables = np.copy(text_regions_p) | ||||
|                 text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10 | ||||
|                 pixel_line = 3 | ||||
|  | @ -2722,7 +2728,7 @@ class Eynollah: | |||
|                 img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction, 10, num_col_classifier) | ||||
|                  | ||||
|             else: | ||||
|                 boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables) | ||||
|                 boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) | ||||
|                 text_regions_p_tables = np.copy(text_regions_p_1_n) | ||||
|                 text_regions_p_tables = np.round(text_regions_p_tables) | ||||
|                 text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10 | ||||
|  | @ -3065,10 +3071,17 @@ class Eynollah: | |||
|                          | ||||
| 
 | ||||
|                 if np.abs(slope_deskew) < SLOPE_THRESHOLD: | ||||
|                     boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables) | ||||
|                     boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) | ||||
|                 else: | ||||
|                     boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables) | ||||
|              | ||||
|                     boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left)      | ||||
| 
 | ||||
|             #print(boxes_d,'boxes_d') | ||||
|             #img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1])) | ||||
|             #for box_i in boxes_d: | ||||
|                 #img_once[int(box_i[2]):int(box_i[3]),int(box_i[0]):int(box_i[1]) ] =1 | ||||
|             #plt.imshow(img_once) | ||||
|             #plt.show() | ||||
|             #print(np.unique(img_once),'img_once') | ||||
|             if self.plotter: | ||||
|                 self.plotter.write_images_into_directory(polygons_of_images, image_page) | ||||
|             t_order = time.time() | ||||
|  |  | |||
|  | @ -1672,7 +1672,9 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, | |||
|     return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n | ||||
|          | ||||
| 
 | ||||
| def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, tables): | ||||
| def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, tables, right2left_readingorder): | ||||
|     if right2left_readingorder: | ||||
|         regions_without_separators = cv2.flip(regions_without_separators,1) | ||||
|     boxes=[] | ||||
|     peaks_neg_tot_tables = [] | ||||
| 
 | ||||
|  | @ -1763,6 +1765,13 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho | |||
|             cy_hor_diff=matrix_new[:,7][ (matrix_new[:,9]==0) ] | ||||
|             arg_org_hor_some=matrix_new[:,0][ (matrix_new[:,9]==0) ] | ||||
|              | ||||
|             if right2left_readingorder: | ||||
|                 x_max_hor_some_new = regions_without_separators.shape[1] - x_min_hor_some | ||||
|                 x_min_hor_some_new = regions_without_separators.shape[1] - x_max_hor_some | ||||
|                  | ||||
|                 x_min_hor_some =list(np.copy(x_min_hor_some_new)) | ||||
|                 x_max_hor_some =list(np.copy(x_max_hor_some_new)) | ||||
|              | ||||
|              | ||||
|              | ||||
|              | ||||
|  | @ -1774,7 +1783,6 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho | |||
|             reading_order_type,x_starting,x_ending,y_type_2,y_diff_type_2,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother,new_main_sep_y=return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x_max_hor_some,cy_hor_some,peaks_neg_tot,cy_hor_diff) | ||||
|              | ||||
| 
 | ||||
|              | ||||
|             if (reading_order_type==1) or (reading_order_type==0 and (len(y_lines_without_mother)>=2 or there_is_sep_with_child==1)): | ||||
| 
 | ||||
|                  | ||||
|  | @ -2027,6 +2035,7 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho | |||
|                          | ||||
|                         columns_not_covered_child_no_mother=np.sort(columns_not_covered_child_no_mother) | ||||
|                          | ||||
|                          | ||||
| 
 | ||||
|                         ind_args=np.array(range(len(y_type_2))) | ||||
|                          | ||||
|  | @ -2281,7 +2290,6 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho | |||
|                      | ||||
|                 ind_args=np.array(range(len(y_type_2))) | ||||
|                 #ind_args=np.array(ind_args) | ||||
|                 #print(ind_args,'ind_args') | ||||
|                 for column in range(len(peaks_neg_tot)-1): | ||||
|                     #print(column,'column') | ||||
|                     ind_args_in_col=ind_args[x_starting==column] | ||||
|  | @ -2337,4 +2345,21 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho | |||
|                      | ||||
|         #else: | ||||
|             #boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]]) | ||||
|     return boxes, peaks_neg_tot_tables | ||||
|              | ||||
|     if right2left_readingorder:         | ||||
|         peaks_neg_tot_tables_new = [] | ||||
|         if len(peaks_neg_tot_tables)>=1: | ||||
|             for peaks_tab_ind in peaks_neg_tot_tables: | ||||
|                 peaks_neg_tot_tables_ind = regions_without_separators.shape[1] - np.array(peaks_tab_ind) | ||||
|                 peaks_neg_tot_tables_ind = list(peaks_neg_tot_tables_ind[::-1]) | ||||
|                 peaks_neg_tot_tables_new.append(peaks_neg_tot_tables_ind) | ||||
|                  | ||||
|          | ||||
|         for i in range(len(boxes)): | ||||
|             x_start_new = regions_without_separators.shape[1] - boxes[i][1] | ||||
|             x_end_new = regions_without_separators.shape[1] - boxes[i][0] | ||||
|             boxes[i][0] = x_start_new | ||||
|             boxes[i][1] = x_end_new | ||||
|         return boxes, peaks_neg_tot_tables_new | ||||
|     else: | ||||
|         return boxes, peaks_neg_tot_tables | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue