From a2341deab4f3293e48176dc5e1452c6db69e5090 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Wed, 25 Nov 2020 19:13:47 +0100 Subject: [PATCH] :art: remove trailing spaces --- sbb_newspapers_org_image/eynollah.py | 352 +++++++++++++-------------- sbb_newspapers_org_image/utils.py | 204 ++++++++-------- 2 files changed, 278 insertions(+), 278 deletions(-) diff --git a/sbb_newspapers_org_image/eynollah.py b/sbb_newspapers_org_image/eynollah.py index 8069506..bb37555 100644 --- a/sbb_newspapers_org_image/eynollah.py +++ b/sbb_newspapers_org_image/eynollah.py @@ -1238,12 +1238,12 @@ class eynollah: crop_img, crop_coor = crop_image_inside_box(boxes_text[mv], image_page_rotated) # all_box_coord.append(crop_coor) - + mask_textline=np.zeros((textline_mask_tot_ea.shape)) - + mask_textline=cv2.fillPoly(mask_textline,pts=[contours_per_process[mv]],color=(1,1,1)) - - + + denoised = None all_text_region_raw=(textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ] @@ -2758,258 +2758,258 @@ class eynollah: def get_regions_from_xy_2models(self,img,is_image_enhanced): img_org=np.copy(img) - + img_height_h=img_org.shape[0] img_width_h=img_org.shape[1] - + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) - + gaussian_filter=False patches=True binary=False - - - - + + + + ratio_y=1.3 ratio_x=1 - + median_blur=False - + img= self.resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - + if binary: img = self.otsu_copy_binary(img)#self.otsu_copy(img) img = img.astype(np.uint16) - + if median_blur: img=cv2.medianBlur(img,5) if gaussian_filter: img= cv2.GaussianBlur(img,(5,5),0) img = img.astype(np.uint16) prediction_regions_org_y=self.do_prediction(patches,img,model_region) - + prediction_regions_org_y=self.resize_image(prediction_regions_org_y, img_height_h, img_width_h ) - + #plt.imshow(prediction_regions_org_y[:,:,0]) #plt.show() #sys.exit() prediction_regions_org_y=prediction_regions_org_y[:,:,0] - - + + mask_zeros_y=(prediction_regions_org_y[:,:]==0)*1 - - - - - + + + + + if is_image_enhanced: ratio_x=1.2 else: ratio_x=1 - + ratio_y=1 median_blur=False - + img= self.resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - + if binary: img = self.otsu_copy_binary(img)#self.otsu_copy(img) img = img.astype(np.uint16) - + if median_blur: img=cv2.medianBlur(img,5) if gaussian_filter: img= cv2.GaussianBlur(img,(5,5),0) img = img.astype(np.uint16) prediction_regions_org=self.do_prediction(patches,img,model_region) - + prediction_regions_org=self.resize_image(prediction_regions_org, img_height_h, img_width_h ) - + ##plt.imshow(prediction_regions_org[:,:,0]) ##plt.show() ##sys.exit() prediction_regions_org=prediction_regions_org[:,:,0] - + prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros_y[:,:]==1)]=0 session_region.close() del model_region del session_region gc.collect() ###K.clear_session() - + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2) - + gaussian_filter=False patches=True binary=False - - - + + + ratio_x=1 ratio_y=1 median_blur=False - + img= self.resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - + if binary: img = self.otsu_copy_binary(img)#self.otsu_copy(img) img = img.astype(np.uint16) - + if median_blur: img=cv2.medianBlur(img,5) if gaussian_filter: img= cv2.GaussianBlur(img,(5,5),0) img = img.astype(np.uint16) - + marginal_patch=0.2 prediction_regions_org2=self.do_prediction(patches,img,model_region,marginal_patch) - + prediction_regions_org2=self.resize_image(prediction_regions_org2, img_height_h, img_width_h ) - + #plt.imshow(prediction_regions_org2[:,:,0]) #plt.show() #sys.exit() ##prediction_regions_org=prediction_regions_org[:,:,0] - + session_region.close() del model_region del session_region gc.collect() ###K.clear_session() - + mask_zeros2=(prediction_regions_org2[:,:,0]==0)*1 mask_lines2=(prediction_regions_org2[:,:,0]==3)*1 - + text_sume_early=( (prediction_regions_org[:,:]==1)*1 ).sum() - - + + prediction_regions_org_copy=np.copy(prediction_regions_org) - + prediction_regions_org_copy[(prediction_regions_org_copy[:,:]==1) & (mask_zeros2[:,:]==1)]=0 - + text_sume_second=( (prediction_regions_org_copy[:,:]==1)*1 ).sum() - + rate_two_models=text_sume_second/float(text_sume_early)*100 - + print(rate_two_models,'ratio_of_two_models') if is_image_enhanced and rate_two_models<95.50:#98.45: pass else: prediction_regions_org=np.copy(prediction_regions_org_copy) - + ##prediction_regions_org[mask_lines2[:,:]==1]=3 prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3 - - + + del mask_lines2 del mask_zeros2 del prediction_regions_org2 - + #if is_image_enhanced: #pass #else: #model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2) - + #gaussian_filter=False #patches=True #binary=False - - - + + + #ratio_x=1 #ratio_y=1 #median_blur=False - + #img= self.resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - + #if binary: #img = self.otsu_copy_binary(img)#self.otsu_copy(img) #img = img.astype(np.uint16) - + #if median_blur: #img=cv2.medianBlur(img,5) #if gaussian_filter: #img= cv2.GaussianBlur(img,(5,5),0) #img = img.astype(np.uint16) #prediction_regions_org2=self.do_prediction(patches,img,model_region) - + #prediction_regions_org2=self.resize_image(prediction_regions_org2, img_height_h, img_width_h ) - + ##plt.imshow(prediction_regions_org2[:,:,0]) ##plt.show() ##sys.exit() ###prediction_regions_org=prediction_regions_org[:,:,0] - + #session_region.close() #del model_region #del session_region #gc.collect() ####K.clear_session() - + #mask_zeros2=(prediction_regions_org2[:,:,0]==0)*1 #mask_lines2=(prediction_regions_org2[:,:,0]==3)*1 - + #text_sume_early=( (prediction_regions_org[:,:]==1)*1 ).sum() - + #prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros2[:,:]==1)]=0 - + ###prediction_regions_org[mask_lines2[:,:]==1]=3 #prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3 - + #text_sume_second=( (prediction_regions_org[:,:]==1)*1 ).sum() - + #print(text_sume_second/float(text_sume_early)*100,'twomodelsratio') - + #del mask_lines2 #del mask_zeros2 #del prediction_regions_org2 - + mask_lines_only=(prediction_regions_org[:,:]==3)*1 - + prediction_regions_org = cv2.erode(prediction_regions_org[:,:], self.kernel, iterations=2) - + #plt.imshow(text_region2_1st_channel) #plt.show() - + prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], self.kernel, iterations=2) - + mask_texts_only=(prediction_regions_org[:,:]==1)*1 - + mask_images_only=(prediction_regions_org[:,:]==2)*1 - - - + + + pixel_img=1 min_area_text=0.00001 polygons_of_only_texts=self.return_contours_of_interested_region(mask_texts_only,pixel_img,min_area_text) - + polygons_of_only_images=self.return_contours_of_interested_region(mask_images_only,pixel_img) - + polygons_of_only_lines=self.return_contours_of_interested_region(mask_lines_only,pixel_img,min_area_text) - - + + text_regions_p_true=np.zeros(prediction_regions_org.shape) #text_regions_p_true[:,:]=text_regions_p_1[:,:] - + text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_lines, color=(3,3,3)) - + ##text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_images, color=(2,2,2)) text_regions_p_true[:,:][mask_images_only[:,:]==1]=2 - + text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) - + ##print(np.unique(text_regions_p_true)) - - + + #text_regions_p_true_3d=np.repeat(text_regions_p_1[:, :, np.newaxis], 3, axis=2) #text_regions_p_true_3d=text_regions_p_true_3d.astype(np.uint8) - + del polygons_of_only_texts del polygons_of_only_images del polygons_of_only_lines @@ -3017,14 +3017,14 @@ class eynollah: del prediction_regions_org del img del mask_zeros_y - + del prediction_regions_org_y del img_org gc.collect() - + return text_regions_p_true - - + + def write_images_into_directory(self, img_contoures, dir_of_cropped_imgs, image_page): index = 0 for cont_ind in img_contoures: @@ -3044,13 +3044,13 @@ class eynollah: def get_marginals(self,text_with_lines,text_regions,num_col,slope_deskew): mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1])) mask_marginals=mask_marginals.astype(np.uint8) - - + + text_with_lines=text_with_lines.astype(np.uint8) ##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3) - + text_with_lines_eroded=cv2.erode(text_with_lines,self.kernel,iterations=5) - + if text_with_lines.shape[0]<=1500: pass elif text_with_lines.shape[0]>1500 and text_with_lines.shape[0]<=1800: @@ -3061,46 +3061,46 @@ class eynollah: text_with_lines=self.resize_image(text_with_lines,int(text_with_lines.shape[0]*1.8),text_with_lines.shape[1]) text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=7) text_with_lines=self.resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1]) - + text_with_lines_y=text_with_lines.sum(axis=0) text_with_lines_y_eroded=text_with_lines_eroded.sum(axis=0) - - thickness_along_y_percent=text_with_lines_y_eroded.max()/(float(text_with_lines.shape[0]))*100 - + + thickness_along_y_percent=text_with_lines_y_eroded.max()/(float(text_with_lines.shape[0]))*100 + #print(thickness_along_y_percent,'thickness_along_y_percent') - + if thickness_along_y_percent<30: min_textline_thickness=8 elif thickness_along_y_percent>=30 and thickness_along_y_percent<50: min_textline_thickness=20 else: min_textline_thickness=40 - - - + + + if thickness_along_y_percent>=14: - + text_with_lines_y_rev=-1*text_with_lines_y[:] #print(text_with_lines_y) #print(text_with_lines_y_rev) - - - + + + #plt.plot(text_with_lines_y) #plt.show() - - + + text_with_lines_y_rev=text_with_lines_y_rev-np.min(text_with_lines_y_rev) - + #plt.plot(text_with_lines_y_rev) #plt.show() sigma_gaus=1 region_sum_0= gaussian_filter1d(text_with_lines_y, sigma_gaus) - + region_sum_0_rev=gaussian_filter1d(text_with_lines_y_rev, sigma_gaus) - + #plt.plot(region_sum_0_rev) #plt.show() region_sum_0_updown=region_sum_0[len(region_sum_0)::-1] @@ -3110,125 +3110,125 @@ class eynollah: last_nonzero=len(region_sum_0)-last_nonzero - + ##img_sum_0_smooth_rev=-region_sum_0 - - + + mid_point=(last_nonzero+first_nonzero)/2. - - + + one_third_right=(last_nonzero-mid_point)/3.0 one_third_left=(mid_point-first_nonzero)/3.0 - + #img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev) - - - + + + peaks, _ = find_peaks(text_with_lines_y_rev, height=0) - + peaks=np.array(peaks) - - + + #print(region_sum_0[peaks]) ##plt.plot(region_sum_0) ##plt.plot(peaks,region_sum_0[peaks],'*') ##plt.show() #print(first_nonzero,last_nonzero,peaks) peaks=peaks[(peaks>first_nonzero) & ((peaksmid_point] peaks_left=peaks[peaks(mid_point+one_third_right)] peaks_left=peaks[peaks<(mid_point-one_third_left)] - - + + try: point_right=np.min(peaks_right) except: point_right=last_nonzero - - + + try: point_left=np.max(peaks_left) except: point_left=first_nonzero - - - + + + #print(point_left,point_right) #print(text_regions.shape) if point_right>=mask_marginals.shape[1]: point_right=mask_marginals.shape[1]-1 - + try: mask_marginals[:,point_left:point_right]=1 except: mask_marginals[:,:]=1 - + #print(mask_marginals.shape,point_left,point_right,'nadosh') mask_marginals_rotated=self.rotate_image(mask_marginals,-slope_deskew) - + #print(mask_marginals_rotated.shape,'nadosh') mask_marginals_rotated_sum=mask_marginals_rotated.sum(axis=0) - + mask_marginals_rotated_sum[mask_marginals_rotated_sum!=0]=1 index_x=np.array(range(len(mask_marginals_rotated_sum)))+1 - + index_x_interest=index_x[mask_marginals_rotated_sum==1] - + min_point_of_left_marginal=np.min(index_x_interest)-16 max_point_of_right_marginal=np.max(index_x_interest)+16 - + if min_point_of_left_marginal<0: min_point_of_left_marginal=0 if max_point_of_right_marginal>=text_regions.shape[1]: max_point_of_right_marginal=text_regions.shape[1]-1 - - + + #print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew') #print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated') #plt.imshow(mask_marginals) #plt.show() - + #plt.imshow(mask_marginals_rotated) #plt.show() text_regions[(mask_marginals_rotated[:,:]!=1) & (text_regions[:,:]==1)]=4 - + #plt.imshow(text_regions) #plt.show() - + pixel_img=4 min_area_text=0.00001 polygons_of_marginals=self.return_contours_of_interested_region(text_regions,pixel_img,min_area_text) - + cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=self.find_new_features_of_contoures(polygons_of_marginals) - + text_regions[(text_regions[:,:]==4)]=1 - + marginlas_should_be_main_text=[] - + x_min_marginals_left=[] x_min_marginals_right=[] - + for i in range(len(cx_text_only)): - + x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i]) y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i]) #print(x_width_mar,y_height_mar,y_height_mar/x_width_mar,'y_height_mar') @@ -3246,39 +3246,39 @@ class eynollah: x_min_marginals_right.append(x_min_marginals_right_new) else: x_min_marginals_right[0]=min(x_min_marginals_right[0],x_min_marginals_right_new) - - if len(x_min_marginals_left)==0: + + if len(x_min_marginals_left)==0: x_min_marginals_left=[0] if len(x_min_marginals_right)==0: x_min_marginals_right=[text_regions.shape[1]-1] - - - - - #print(x_min_marginals_left[0],x_min_marginals_right[0],'margo') - + + + + + #print(x_min_marginals_left[0],x_min_marginals_right[0],'margo') + #print(marginlas_should_be_main_text,'marginlas_should_be_main_text') - text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) - + text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) + #print(np.unique(text_regions)) - + #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 - + text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0 text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0 - + ###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4 - + ###text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4 #plt.plot(region_sum_0) #plt.plot(peaks,region_sum_0[peaks],'*') #plt.show() - - + + #plt.imshow(text_regions) #plt.show() - + #sys.exit() else: pass diff --git a/sbb_newspapers_org_image/utils.py b/sbb_newspapers_org_image/utils.py index c42d0d4..acdb398 100644 --- a/sbb_newspapers_org_image/utils.py +++ b/sbb_newspapers_org_image/utils.py @@ -1526,12 +1526,12 @@ def find_num_col_deskew(regions_without_seperators, sigma_, multiplier=3.8): ###peaks, _ = find_peaks(z, height=0) ###peaks_neg=peaks_neg-10-10 - + ####print(np.std(z),'np.std(z)np.std(z)np.std(z)') - + #####plt.plot(z) #####plt.show() - + #####plt.imshow(regions_without_seperators) #####plt.show() ###""" @@ -1539,18 +1539,18 @@ def find_num_col_deskew(regions_without_seperators, sigma_, multiplier=3.8): ###first_nonzero=first_nonzero+0#+100 ###peaks_neg=peaks_neg[(peaks_neg>first_nonzero) & (peaks_neg.06*regions_without_seperators.shape[1]) & (peaks<0.94*regions_without_seperators.shape[1])] ###""" ###interest_pos=z[peaks] - + ###interest_pos=interest_pos[interest_pos>10] - + ###interest_neg=z[peaks_neg] - + ###min_peaks_pos=np.mean(interest_pos) ###min_peaks_neg=0#np.min(interest_neg) - + ###dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier ####print(interest_pos) ###grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 @@ -1558,18 +1558,18 @@ def find_num_col_deskew(regions_without_seperators, sigma_, multiplier=3.8): ###interest_neg_fin=interest_neg[(interest_negimg_patch_org.shape[0]: - + #plt.imshow(img_resized) #plt.show() angels=np.array([-45, 0 , 45 , 90 , ])#np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) - + #res=[] #num_of_peaks=[] #index_cor=[] var_res=[] - + #indexer=0 for rot in angels: img_rot=self.rotate_image(img_resized,rot) @@ -2414,8 +2414,8 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non #plt.show() img_rot[img_rot!=0]=1 #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) - - + + #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) #print(var_spectrum,'var_spectrum') try: @@ -2426,7 +2426,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non #res_me=1000000000000000000000 #else: #pass - + #res_num=len(neg_peaks) except: #res_me=1000000000000000000000 @@ -2440,7 +2440,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non #num_of_peaks.append( res_num ) #index_cor.append(indexer) #indexer=indexer+1 - + var_res.append(var_spectrum) #index_cor.append(indexer) #indexer=indexer+1 @@ -2448,19 +2448,19 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non try: var_res=np.array(var_res) - + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] except: ang_int=0 - - + + angels=np.linspace(ang_int-22.5,ang_int+22.5,100) #res=[] #num_of_peaks=[] #index_cor=[] var_res=[] - + for rot in angels: img_rot=self.rotate_image(img_resized,rot) @@ -2476,33 +2476,33 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non var_res.append(var_spectrum) - + try: var_res=np.array(var_res) - + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] except: ang_int=0 - + elif main_page and img_patch_org.shape[1]<=img_patch_org.shape[0]: - + #plt.imshow(img_resized) #plt.show() angels=np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) - + var_res=[] - + for rot in angels: img_rot=self.rotate_image(img_resized,rot) #plt.imshow(img_rot) #plt.show() img_rot[img_rot!=0]=1 #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) - - + + #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) #print(var_spectrum,'var_spectrum') try: @@ -2513,7 +2513,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non var_res.append(var_spectrum) - + if self.dir_of_all is not None: #print('galdi?') plt.figure(figsize=(60,30)) @@ -2521,7 +2521,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non plt.plot(angels,np.array(var_res),'-o',markersize=25,linewidth=4) plt.xlabel('angle',fontsize=50) plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50) - + plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))] ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$') plt.legend(loc='best') plt.savefig(os.path.join(self.dir_of_all,self.f_name+'_rotation_angle.png')) @@ -2529,19 +2529,19 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non try: var_res=np.array(var_res) - + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] except: ang_int=0 - - + + early_slope_edge=11 if abs(ang_int)>early_slope_edge and ang_int<0: - + angels=np.linspace(-90,-12,100) var_res=[] - + for rot in angels: img_rot=self.rotate_image(img_resized,rot) ##plt.imshow(img_rot) @@ -2558,18 +2558,18 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non try: var_res=np.array(var_res) - + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] except: ang_int=0 - + elif abs(ang_int)>early_slope_edge and ang_int>0: - + angels=np.linspace(90,12,100) var_res=[] - + for rot in angels: img_rot=self.rotate_image(img_resized,rot) ##plt.imshow(img_rot) @@ -2587,17 +2587,17 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non try: var_res=np.array(var_res) - + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] except: ang_int=0 else: - + angels=np.linspace(-25,25,60) var_res=[] - + indexer=0 for rot in angels: img_rot=self.rotate_image(img_resized,rot) @@ -2605,39 +2605,39 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non #plt.show() img_rot[img_rot!=0]=1 #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) - - + + #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) #print(var_spectrum,'var_spectrum') try: var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) except: var_spectrum=0 - + var_res.append(var_spectrum) try: var_res=np.array(var_res) - + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] except: ang_int=0 - + #plt.plot(var_res) #plt.show() - + ##plt.plot(mom3_res) ##plt.show() #print(ang_int,'ang_int111') early_slope_edge=22 if abs(ang_int)>early_slope_edge and ang_int<0: - + angels=np.linspace(-90,-25,60) var_res=[] - + for rot in angels: img_rot=self.rotate_image(img_resized,rot) ##plt.imshow(img_rot) @@ -2649,24 +2649,24 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non except: var_spectrum=0 - + var_res.append(var_spectrum) try: var_res=np.array(var_res) - + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] except: ang_int=0 - + elif abs(ang_int)>early_slope_edge and ang_int>0: - + angels=np.linspace(90,25,60) var_res=[] - + indexer=0 for rot in angels: img_rot=self.rotate_image(img_resized,rot) @@ -2686,11 +2686,11 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non try: var_res=np.array(var_res) - + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] except: ang_int=0 - + return ang_int @@ -2749,55 +2749,55 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_texline_polygons,slopes,contours_only_text_parent_d_ordered): #text_only=(regions_model_1[:,:]==1)*1 #contours_only_text,hir_on_text=self.return_contours_of_image(text_only) - + """ contours_only_text_parent=self.return_parent_contours( contours_only_text,hir_on_text) - + areas_cnt_text=np.array([cv2.contourArea(contours_only_text_parent[j]) for j in range(len(contours_only_text_parent))]) areas_cnt_text=areas_cnt_text/float(text_only.shape[0]*text_only.shape[1]) - + ###areas_cnt_text_h=np.array([cv2.contourArea(contours_only_text_parent_h[j]) for j in range(len(contours_only_text_parent_h))]) ###areas_cnt_text_h=areas_cnt_text_h/float(text_only_h.shape[0]*text_only_h.shape[1]) ###contours_only_text_parent=[contours_only_text_parent[jz] for jz in range(len(contours_only_text_parent)) if areas_cnt_text[jz]>0.0002] contours_only_text_parent=[contours_only_text_parent[jz] for jz in range(len(contours_only_text_parent)) if areas_cnt_text[jz]>0.00001] """ - + cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=self.find_new_features_of_contoures(contours_only_text_parent) - + length_con=x_max_main-x_min_main height_con=y_max_main-y_min_main - - + + all_found_texline_polygons_main=[] all_found_texline_polygons_head=[] - + all_box_coord_main=[] all_box_coord_head=[] - + slopes_main=[] slopes_head=[] - + contours_only_text_parent_main=[] contours_only_text_parent_head=[] - + contours_only_text_parent_main_d=[] contours_only_text_parent_head_d=[] - + for ii in range(len(contours_only_text_parent)): con=contours_only_text_parent[ii] img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) - - - + + + all_pixels=((img[:,:,0]==255)*1).sum() - + pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() pixels_main=all_pixels-pixels_header - - + + if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 contours_only_text_parent_head.append(con) @@ -2814,11 +2814,11 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions all_box_coord_main.append(all_box_coord[ii]) slopes_main.append(slopes[ii]) all_found_texline_polygons_main.append(all_found_texline_polygons[ii]) - + #print(all_pixels,pixels_main,pixels_header) - - + + #plt.imshow(img[:,:,0]) #plt.show() return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_texline_polygons_main,all_found_texline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d