mirror of
				https://github.com/qurator-spk/eynollah.git
				synced 2025-11-03 19:24:13 +01:00 
			
		
		
		
	updating light version
This commit is contained in:
		
							parent
							
								
									543ed4bc38
								
							
						
					
					
						commit
						1da4b7f589
					
				
					 1 changed files with 35 additions and 6 deletions
				
			
		| 
						 | 
					@ -252,7 +252,7 @@ class Eynollah:
 | 
				
			||||||
        self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
 | 
					        self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
 | 
				
			||||||
        self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
 | 
					        self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
 | 
				
			||||||
        self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based"
 | 
					        self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based"
 | 
				
			||||||
        self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlyla_12_0_2_con_18_22"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige"
 | 
					        self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige"
 | 
				
			||||||
        ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans"
 | 
					        ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans"
 | 
				
			||||||
        self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans"
 | 
					        self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans"
 | 
				
			||||||
        if self.textline_light:
 | 
					        if self.textline_light:
 | 
				
			||||||
| 
						 | 
					@ -2189,7 +2189,7 @@ class Eynollah:
 | 
				
			||||||
        #print(num_col_classifier,'num_col_classifier')
 | 
					        #print(num_col_classifier,'num_col_classifier')
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        if num_col_classifier == 1:
 | 
					        if num_col_classifier == 1:
 | 
				
			||||||
            img_w_new = 800
 | 
					            img_w_new = 1000
 | 
				
			||||||
            img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new)
 | 
					            img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new)
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
        elif num_col_classifier == 2:
 | 
					        elif num_col_classifier == 2:
 | 
				
			||||||
| 
						 | 
					@ -2299,9 +2299,9 @@ class Eynollah:
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            mask_texts_only = mask_texts_only.astype('uint8')
 | 
					            mask_texts_only = mask_texts_only.astype('uint8')
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            #if num_col_classifier == 1 or num_col_classifier == 2:
 | 
					            ##if num_col_classifier == 1 or num_col_classifier == 2:
 | 
				
			||||||
                #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1)
 | 
					                ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1)
 | 
				
			||||||
                #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1)
 | 
					                ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1)
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1)
 | 
					            mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1)
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
| 
						 | 
					@ -4153,7 +4153,7 @@ class Eynollah:
 | 
				
			||||||
            if dilation_m1<6:
 | 
					            if dilation_m1<6:
 | 
				
			||||||
                dilation_m1 = 6
 | 
					                dilation_m1 = 6
 | 
				
			||||||
            #print(dilation_m1, 'dilation_m1')
 | 
					            #print(dilation_m1, 'dilation_m1')
 | 
				
			||||||
            dilation_m1 = 5
 | 
					            dilation_m1 = 6
 | 
				
			||||||
            dilation_m2 = int(dilation_m1/2.) +1 
 | 
					            dilation_m2 = int(dilation_m1/2.) +1 
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            for i in range(len(x_differential)):
 | 
					            for i in range(len(x_differential)):
 | 
				
			||||||
| 
						 | 
					@ -4657,6 +4657,31 @@ class Eynollah:
 | 
				
			||||||
                all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0]
 | 
					                all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0]
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
        return all_found_textline_polygons
 | 
					        return all_found_textline_polygons
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    def delete_regions_without_textlines(self,slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con):
 | 
				
			||||||
 | 
					        slopes_rem = []
 | 
				
			||||||
 | 
					        all_found_textline_polygons_rem = []
 | 
				
			||||||
 | 
					        boxes_text_rem = []
 | 
				
			||||||
 | 
					        txt_con_org_rem = []
 | 
				
			||||||
 | 
					        contours_only_text_parent_rem = []
 | 
				
			||||||
 | 
					        index_by_text_par_con_rem = []
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        for i, ind_con in enumerate(all_found_textline_polygons):
 | 
				
			||||||
 | 
					            if len(ind_con):
 | 
				
			||||||
 | 
					                all_found_textline_polygons_rem.append(ind_con)
 | 
				
			||||||
 | 
					                slopes_rem.append(slopes[i])
 | 
				
			||||||
 | 
					                boxes_text_rem.append(boxes_text[i])
 | 
				
			||||||
 | 
					                txt_con_org_rem.append(txt_con_org[i])
 | 
				
			||||||
 | 
					                contours_only_text_parent_rem.append(contours_only_text_parent[i])
 | 
				
			||||||
 | 
					                index_by_text_par_con_rem.append(index_by_text_par_con[i])
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					        index_sort = np.argsort(index_by_text_par_con_rem)
 | 
				
			||||||
 | 
					        indexes_new = np.array(range(len(index_by_text_par_con_rem)))
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] for j in range(len(index_by_text_par_con_rem))]
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					        return slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, contours_only_text_parent_rem, index_by_text_par_con_rem_sort
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def run(self):
 | 
					    def run(self):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        Get image and scales, then extract the page of scanned image
 | 
					        Get image and scales, then extract the page of scanned image
 | 
				
			||||||
| 
						 | 
					@ -4923,6 +4948,9 @@ class Eynollah:
 | 
				
			||||||
                            slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew)
 | 
					                            slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew)
 | 
				
			||||||
                            slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew)
 | 
					                            slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew)
 | 
				
			||||||
                            
 | 
					                            
 | 
				
			||||||
 | 
					                            #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con)
 | 
				
			||||||
 | 
					                            
 | 
				
			||||||
 | 
					                            #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals))))
 | 
				
			||||||
                            #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons)
 | 
					                            #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons)
 | 
				
			||||||
                            all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons)
 | 
					                            all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons)
 | 
				
			||||||
                            all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline")
 | 
					                            all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline")
 | 
				
			||||||
| 
						 | 
					@ -5121,6 +5149,7 @@ class Eynollah:
 | 
				
			||||||
                all_found_textline_polygons=[ all_found_textline_polygons ]
 | 
					                all_found_textline_polygons=[ all_found_textline_polygons ]
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons)
 | 
					                all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons)
 | 
				
			||||||
 | 
					                all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline")
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                
 | 
					                
 | 
				
			||||||
                order_text_new = [0]
 | 
					                order_text_new = [0]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue