From 87ef313502bac633d1e6a5b3a699296bf87d9870 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 27 Nov 2020 14:38:59 +0100 Subject: [PATCH] more outsourcing of utils --- sbb_newspapers_org_image/eynollah.py | 629 +----------------- sbb_newspapers_org_image/unused.py | 258 +++++++ sbb_newspapers_org_image/utils/__init__.py | 349 ---------- sbb_newspapers_org_image/utils/marginals.py | 252 +++++++ sbb_newspapers_org_image/utils/resize.py | 4 + .../utils/separate_lines.py | 454 +++++++++++++ 6 files changed, 979 insertions(+), 967 deletions(-) create mode 100644 sbb_newspapers_org_image/utils/marginals.py create mode 100644 sbb_newspapers_org_image/utils/resize.py diff --git a/sbb_newspapers_org_image/eynollah.py b/sbb_newspapers_org_image/eynollah.py index ba304fa..87265ea 100644 --- a/sbb_newspapers_org_image/eynollah.py +++ b/sbb_newspapers_org_image/eynollah.py @@ -53,7 +53,6 @@ from .utils.contour import ( return_contours_of_interested_textline, return_parent_contours, return_contours_of_interested_region_by_size, - textline_contours_postprocessing, ) from .utils.rotate import ( @@ -73,6 +72,9 @@ from .utils.separate_lines import ( seperate_lines_new_inside_teils2, seperate_lines_vertical, seperate_lines_vertical_cont, + textline_contours_postprocessing, + seperate_lines_new2, + return_deskew_slop, ) from .utils.drop_capitals import ( @@ -80,6 +82,10 @@ from .utils.drop_capitals import ( filter_small_drop_capitals_from_no_patch_layout ) +from .utils.marginals import get_marginals + +from .utils.resize import resize_image + from .utils import ( boosting_headers_by_longshot_region_segmentation, crop_image_inside_box, @@ -91,11 +97,9 @@ from .utils import ( isNaN, otsu_copy, otsu_copy_binary, - resize_image, return_hor_spliter_by_index_for_without_verticals, delete_seperator_around, return_regions_without_seperators, - return_deskew_slop, put_drop_out_from_only_drop_model, putt_bb_of_drop_capitals_of_model_in_patches_in_layout, check_any_text_region_in_model_one_is_main_or_header, @@ -1177,7 +1181,7 @@ class eynollah: textline_biggest_region = mask_biggest * textline_mask_tot_ea # print(slope_for_all,'slope_for_all') - textline_rotated_seperated = self.seperate_lines_new2(textline_biggest_region[y : y + h, x : x + w], 0, num_col, slope_for_all) + textline_rotated_seperated = seperate_lines_new2(textline_biggest_region[y : y + h, x : x + w], 0, num_col, slope_for_all, self.dir_of_all, self.f_name) # new line added ##print(np.shape(textline_rotated_seperated),np.shape(mask_biggest)) @@ -1398,374 +1402,6 @@ class eynollah: gc.collect() return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0] - def seperate_lines_new(self, img_path, thetha, num_col): - - if num_col == 1: - num_patches = int(img_path.shape[1] / 200.0) - else: - num_patches = int(img_path.shape[1] / 100.0) - # num_patches=int(img_path.shape[1]/200.) - if num_patches == 0: - num_patches = 1 - (h, w) = img_path.shape[:2] - center = (w // 2, h // 2) - M = cv2.getRotationMatrix2D(center, -thetha, 1.0) - x_d = M[0, 2] - y_d = M[1, 2] - - thetha = thetha / 180.0 * np.pi - rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]]) - - x_min_cont = 0 - x_max_cont = img_path.shape[1] - y_min_cont = 0 - y_max_cont = img_path.shape[0] - - xv = np.linspace(x_min_cont, x_max_cont, 1000) - - mada_n = img_path.sum(axis=1) - - ##plt.plot(mada_n) - ##plt.show() - first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) - - y = mada_n[:] # [first_nonzero:last_nonzero] - y_help = np.zeros(len(y) + 40) - y_help[20 : len(y) + 20] = y - x = np.array(range(len(y))) - - peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - if len(peaks_real) <= 2 and len(peaks_real) > 1: - sigma_gaus = 10 - else: - sigma_gaus = 6 - - z = gaussian_filter1d(y_help, sigma_gaus) - zneg_rev = -y_help + np.max(y_help) - zneg = np.zeros(len(zneg_rev) + 40) - zneg[20 : len(zneg_rev) + 20] = zneg_rev - zneg = gaussian_filter1d(zneg, sigma_gaus) - - peaks, _ = find_peaks(z, height=0) - peaks_neg, _ = find_peaks(zneg, height=0) - - for nn in range(len(peaks_neg)): - if peaks_neg[nn] > len(z) - 1: - peaks_neg[nn] = len(z) - 1 - if peaks_neg[nn] < 0: - peaks_neg[nn] = 0 - - diff_peaks = np.abs(np.diff(peaks_neg)) - cut_off = 20 - peaks_neg_true = [] - forest = [] - - for i in range(len(peaks_neg)): - if i == 0: - forest.append(peaks_neg[i]) - if i < (len(peaks_neg) - 1): - if diff_peaks[i] <= cut_off: - forest.append(peaks_neg[i + 1]) - if diff_peaks[i] > cut_off: - # print(forest[np.argmin(z[forest]) ] ) - if not isNaN(forest[np.argmin(z[forest])]): - # print(len(z),forest) - peaks_neg_true.append(forest[np.argmin(z[forest])]) - forest = [] - forest.append(peaks_neg[i + 1]) - if i == (len(peaks_neg) - 1): - # print(print(forest[np.argmin(z[forest]) ] )) - if not isNaN(forest[np.argmin(z[forest])]): - - peaks_neg_true.append(forest[np.argmin(z[forest])]) - - peaks_neg_true = np.array(peaks_neg_true) - - """ - #plt.figure(figsize=(40,40)) - #plt.subplot(1,2,1) - #plt.title('Textline segmentation von Textregion') - #plt.imshow(img_path) - #plt.xlabel('X') - #plt.ylabel('Y') - #plt.subplot(1,2,2) - #plt.title('Dichte entlang X') - #base = pyplot.gca().transData - #rot = transforms.Affine2D().rotate_deg(90) - #plt.plot(zneg,np.array(range(len(zneg)))) - #plt.plot(zneg[peaks_neg_true],peaks_neg_true,'*') - #plt.gca().invert_yaxis() - - #plt.xlabel('Dichte') - #plt.ylabel('Y') - ##plt.plot([0,len(y)], [grenze,grenze]) - #plt.show() - """ - - peaks_neg_true = peaks_neg_true - 20 - 20 - peaks = peaks - 20 - - # dis_up=peaks_neg_true[14]-peaks_neg_true[0] - # dis_down=peaks_neg_true[18]-peaks_neg_true[14] - - img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] - - ##plt.imshow(img_patch_ineterst) - ##plt.show() - - length_x = int(img_path.shape[1] / float(num_patches)) - margin = int(0.04 * length_x) - - width_mid = length_x - 2 * margin - - nxf = img_path.shape[1] / float(width_mid) - - if nxf > int(nxf): - nxf = int(nxf) + 1 - else: - nxf = int(nxf) - - slopes_tile_wise = [] - for i in range(nxf): - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + length_x - elif i > 0: - index_x_d = i * width_mid - index_x_u = index_x_d + length_x - - if index_x_u > img_path.shape[1]: - index_x_u = img_path.shape[1] - index_x_d = img_path.shape[1] - length_x - - # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - img_xline = img_patch_ineterst[:, index_x_d:index_x_u] - - sigma = 2 - try: - slope_xline = return_deskew_slop(img_xline, sigma, dir_of_all=self.dir_of_all, f_name=self.f_name) - except: - slope_xline = 0 - slopes_tile_wise.append(slope_xline) - # print(slope_xline,'xlineeee') - img_line_rotated = rotate_image(img_xline, slope_xline) - img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 - - """ - - xline=np.linspace(0,img_path.shape[1],nx) - slopes_tile_wise=[] - - for ui in range( nx-1 ): - img_xline=img_patch_ineterst[:,int(xline[ui]):int(xline[ui+1])] - - - ##plt.imshow(img_xline) - ##plt.show() - - sigma=3 - try: - slope_xline=return_deskew_slop(img_xline,sigma, dir_of_all=self.dir_of_all, f_name=self.f_name) - except: - slope_xline=0 - slopes_tile_wise.append(slope_xline) - print(slope_xline,'xlineeee') - img_line_rotated=rotate_image(img_xline,slope_xline) - - ##plt.imshow(img_line_rotated) - ##plt.show() - """ - - # dis_up=peaks_neg_true[14]-peaks_neg_true[0] - # dis_down=peaks_neg_true[18]-peaks_neg_true[14] - - img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] - - img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape) - - for i in range(nxf): - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + length_x - elif i > 0: - index_x_d = i * width_mid - index_x_u = index_x_d + length_x - - if index_x_u > img_path.shape[1]: - index_x_u = img_path.shape[1] - index_x_d = img_path.shape[1] - length_x - - img_xline = img_patch_ineterst[:, index_x_d:index_x_u] - - img_int = np.zeros((img_xline.shape[0], img_xline.shape[1])) - img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0] - - img_resized = np.zeros((int(img_int.shape[0] * (1.2)), int(img_int.shape[1] * (3)))) - - img_resized[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] = img_int[:, :] - ##plt.imshow(img_xline) - ##plt.show() - img_line_rotated = rotate_image(img_resized, slopes_tile_wise[i]) - img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 - - img_patch_seperated = seperate_lines_new_inside_teils(img_line_rotated, 0) - - ##plt.imshow(img_patch_seperated) - ##plt.show() - img_patch_seperated_returned = rotate_image(img_patch_seperated, -slopes_tile_wise[i]) - img_patch_seperated_returned[:, :][img_patch_seperated_returned[:, :] != 0] = 1 - - img_patch_seperated_returned_true_size = img_patch_seperated_returned[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] - - img_patch_seperated_returned_true_size = img_patch_seperated_returned_true_size[:, margin : length_x - margin] - img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_seperated_returned_true_size - - """ - for ui in range( nx-1 ): - img_xline=img_patch_ineterst[:,int(xline[ui]):int(xline[ui+1])] - - - img_int=np.zeros((img_xline.shape[0],img_xline.shape[1])) - img_int[:,:]=img_xline[:,:]#img_patch_org[:,:,0] - - img_resized=np.zeros((int( img_int.shape[0]*(1.2) ) , int( img_int.shape[1]*(3) ) )) - - img_resized[ int( img_int.shape[0]*(.1)):int( img_int.shape[0]*(.1))+img_int.shape[0] , int( img_int.shape[1]*(1)):int( img_int.shape[1]*(1))+img_int.shape[1] ]=img_int[:,:] - ##plt.imshow(img_xline) - ##plt.show() - img_line_rotated=rotate_image(img_resized,slopes_tile_wise[ui]) - - - #img_patch_seperated = seperate_lines_new_inside_teils(img_line_rotated,0) - - img_patch_seperated = seperate_lines_new_inside_teils(img_line_rotated,0) - - img_patch_seperated_returned=rotate_image(img_patch_seperated,-slopes_tile_wise[ui]) - ##plt.imshow(img_patch_seperated) - ##plt.show() - print(img_patch_seperated_returned.shape) - #plt.imshow(img_patch_seperated_returned[ int( img_int.shape[0]*(.1)):int( img_int.shape[0]*(.1))+img_int.shape[0] , int( img_int.shape[1]*(1)):int( img_int.shape[1]*(1))+img_int.shape[1] ]) - #plt.show() - - img_patch_ineterst_revised[:,int(xline[ui]):int(xline[ui+1])]=img_patch_seperated_returned[ int( img_int.shape[0]*(.1)):int( img_int.shape[0]*(.1))+img_int.shape[0] , int( img_int.shape[1]*(1)):int( img_int.shape[1]*(1))+img_int.shape[1] ] - - - """ - - # print(img_patch_ineterst_revised.shape,np.unique(img_patch_ineterst_revised)) - ##plt.imshow(img_patch_ineterst_revised) - ##plt.show() - return img_patch_ineterst_revised - - def seperate_lines_new2(self, img_path, thetha, num_col, slope_region): - - if num_col == 1: - num_patches = int(img_path.shape[1] / 200.0) - else: - num_patches = int(img_path.shape[1] / 140.0) - # num_patches=int(img_path.shape[1]/200.) - if num_patches == 0: - num_patches = 1 - - img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] - - # plt.imshow(img_patch_ineterst) - # plt.show() - - length_x = int(img_path.shape[1] / float(num_patches)) - # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2 - margin = int(0.04 * length_x) - # print(margin,'margin') - # if margin<=4: - # margin = int(0.08 * length_x) - - # margin=0 - - width_mid = length_x - 2 * margin - - nxf = img_path.shape[1] / float(width_mid) - - if nxf > int(nxf): - nxf = int(nxf) + 1 - else: - nxf = int(nxf) - - slopes_tile_wise = [] - for i in range(nxf): - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + length_x - elif i > 0: - index_x_d = i * width_mid - index_x_u = index_x_d + length_x - - if index_x_u > img_path.shape[1]: - index_x_u = img_path.shape[1] - index_x_d = img_path.shape[1] - length_x - - # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - img_xline = img_patch_ineterst[:, index_x_d:index_x_u] - - sigma = 2 - try: - slope_xline = return_deskew_slop(img_xline, sigma, dir_of_all=self.dir_of_all, f_name=self.f_name) - except: - slope_xline = 0 - - if abs(slope_region) < 25 and abs(slope_xline) > 25: - slope_xline = [slope_region][0] - # if abs(slope_region)>70 and abs(slope_xline)<25: - # slope_xline=[slope_region][0] - slopes_tile_wise.append(slope_xline) - # print(slope_xline,'xlineeee') - img_line_rotated = rotate_image(img_xline, slope_xline) - img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 - - # print(slopes_tile_wise,'slopes_tile_wise') - img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] - - img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape) - - for i in range(nxf): - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + length_x - elif i > 0: - index_x_d = i * width_mid - index_x_u = index_x_d + length_x - - if index_x_u > img_path.shape[1]: - index_x_u = img_path.shape[1] - index_x_d = img_path.shape[1] - length_x - - img_xline = img_patch_ineterst[:, index_x_d:index_x_u] - - img_int = np.zeros((img_xline.shape[0], img_xline.shape[1])) - img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0] - - img_resized = np.zeros((int(img_int.shape[0] * (1.2)), int(img_int.shape[1] * (3)))) - - img_resized[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] = img_int[:, :] - # plt.imshow(img_xline) - # plt.show() - img_line_rotated = rotate_image(img_resized, slopes_tile_wise[i]) - img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 - - img_patch_seperated = seperate_lines_new_inside_teils2(img_line_rotated, 0) - - img_patch_seperated_returned = rotate_image(img_patch_seperated, -slopes_tile_wise[i]) - img_patch_seperated_returned[:, :][img_patch_seperated_returned[:, :] != 0] = 1 - - img_patch_seperated_returned_true_size = img_patch_seperated_returned[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] - - img_patch_seperated_returned_true_size = img_patch_seperated_returned_true_size[:, margin : length_x - margin] - img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_seperated_returned_true_size - - # plt.imshow(img_patch_ineterst_revised) - # plt.show() - return img_patch_ineterst_revised - - def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): slope_biggest = 0 slopes_sub = [] @@ -2941,7 +2577,7 @@ class eynollah: #ratio_y=1 #median_blur=False - #img= self.resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) + #img= resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) #if binary: #img = self.otsu_copy_binary(img)#self.otsu_copy(img) @@ -2954,7 +2590,7 @@ class eynollah: #img = img.astype(np.uint16) #prediction_regions_org2=self.do_prediction(patches,img,model_region) - #prediction_regions_org2=self.resize_image(prediction_regions_org2, img_height_h, img_width_h ) + #prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) ##plt.imshow(prediction_regions_org2[:,:,0]) ##plt.show() @@ -3057,249 +2693,6 @@ class eynollah: cv2.imwrite(path, croped_page) index += 1 - def get_marginals(self,text_with_lines,text_regions,num_col,slope_deskew): - mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1])) - mask_marginals=mask_marginals.astype(np.uint8) - - - text_with_lines=text_with_lines.astype(np.uint8) - ##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3) - - text_with_lines_eroded=cv2.erode(text_with_lines,self.kernel,iterations=5) - - if text_with_lines.shape[0]<=1500: - pass - elif text_with_lines.shape[0]>1500 and text_with_lines.shape[0]<=1800: - text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.5),text_with_lines.shape[1]) - text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=5) - text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1]) - else: - text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.8),text_with_lines.shape[1]) - text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=7) - text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1]) - - - text_with_lines_y=text_with_lines.sum(axis=0) - text_with_lines_y_eroded=text_with_lines_eroded.sum(axis=0) - - thickness_along_y_percent=text_with_lines_y_eroded.max()/(float(text_with_lines.shape[0]))*100 - - #print(thickness_along_y_percent,'thickness_along_y_percent') - - if thickness_along_y_percent<30: - min_textline_thickness=8 - elif thickness_along_y_percent>=30 and thickness_along_y_percent<50: - min_textline_thickness=20 - else: - min_textline_thickness=40 - - - - if thickness_along_y_percent>=14: - - text_with_lines_y_rev=-1*text_with_lines_y[:] - #print(text_with_lines_y) - #print(text_with_lines_y_rev) - - - - - #plt.plot(text_with_lines_y) - #plt.show() - - - text_with_lines_y_rev=text_with_lines_y_rev-np.min(text_with_lines_y_rev) - - #plt.plot(text_with_lines_y_rev) - #plt.show() - sigma_gaus=1 - region_sum_0= gaussian_filter1d(text_with_lines_y, sigma_gaus) - - region_sum_0_rev=gaussian_filter1d(text_with_lines_y_rev, sigma_gaus) - - #plt.plot(region_sum_0_rev) - #plt.show() - region_sum_0_updown=region_sum_0[len(region_sum_0)::-1] - - first_nonzero=(next((i for i, x in enumerate(region_sum_0) if x), None)) - last_nonzero=(next((i for i, x in enumerate(region_sum_0_updown) if x), None)) - - - last_nonzero=len(region_sum_0)-last_nonzero - - ##img_sum_0_smooth_rev=-region_sum_0 - - - mid_point=(last_nonzero+first_nonzero)/2. - - - one_third_right=(last_nonzero-mid_point)/3.0 - one_third_left=(mid_point-first_nonzero)/3.0 - - #img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev) - - - - - peaks, _ = find_peaks(text_with_lines_y_rev, height=0) - - - peaks=np.array(peaks) - - - #print(region_sum_0[peaks]) - ##plt.plot(region_sum_0) - ##plt.plot(peaks,region_sum_0[peaks],'*') - ##plt.show() - #print(first_nonzero,last_nonzero,peaks) - peaks=peaks[(peaks>first_nonzero) & ((peaksmid_point] - peaks_left=peaks[peaks(mid_point+one_third_right)] - peaks_left=peaks[peaks<(mid_point-one_third_left)] - - - try: - point_right=np.min(peaks_right) - except: - point_right=last_nonzero - - - try: - point_left=np.max(peaks_left) - except: - point_left=first_nonzero - - - - - #print(point_left,point_right) - #print(text_regions.shape) - if point_right>=mask_marginals.shape[1]: - point_right=mask_marginals.shape[1]-1 - - try: - mask_marginals[:,point_left:point_right]=1 - except: - mask_marginals[:,:]=1 - - #print(mask_marginals.shape,point_left,point_right,'nadosh') - mask_marginals_rotated=rotate_image(mask_marginals,-slope_deskew) - - #print(mask_marginals_rotated.shape,'nadosh') - mask_marginals_rotated_sum=mask_marginals_rotated.sum(axis=0) - - mask_marginals_rotated_sum[mask_marginals_rotated_sum!=0]=1 - index_x=np.array(range(len(mask_marginals_rotated_sum)))+1 - - index_x_interest=index_x[mask_marginals_rotated_sum==1] - - min_point_of_left_marginal=np.min(index_x_interest)-16 - max_point_of_right_marginal=np.max(index_x_interest)+16 - - if min_point_of_left_marginal<0: - min_point_of_left_marginal=0 - if max_point_of_right_marginal>=text_regions.shape[1]: - max_point_of_right_marginal=text_regions.shape[1]-1 - - - #print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew') - #print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated') - #plt.imshow(mask_marginals) - #plt.show() - - #plt.imshow(mask_marginals_rotated) - #plt.show() - - text_regions[(mask_marginals_rotated[:,:]!=1) & (text_regions[:,:]==1)]=4 - - #plt.imshow(text_regions) - #plt.show() - - pixel_img=4 - min_area_text=0.00001 - polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text) - - cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contoures(polygons_of_marginals) - - text_regions[(text_regions[:,:]==4)]=1 - - marginlas_should_be_main_text=[] - - x_min_marginals_left=[] - x_min_marginals_right=[] - - for i in range(len(cx_text_only)): - - x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i]) - y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i]) - #print(x_width_mar,y_height_mar,y_height_mar/x_width_mar,'y_height_mar') - if x_width_mar>16 and y_height_mar/x_width_mar<18: - marginlas_should_be_main_text.append(polygons_of_marginals[i]) - if x_min_text_only[i]<(mid_point-one_third_left): - x_min_marginals_left_new=x_min_text_only[i] - if len(x_min_marginals_left)==0: - x_min_marginals_left.append(x_min_marginals_left_new) - else: - x_min_marginals_left[0]=min(x_min_marginals_left[0],x_min_marginals_left_new) - else: - x_min_marginals_right_new=x_min_text_only[i] - if len(x_min_marginals_right)==0: - x_min_marginals_right.append(x_min_marginals_right_new) - else: - x_min_marginals_right[0]=min(x_min_marginals_right[0],x_min_marginals_right_new) - - if len(x_min_marginals_left)==0: - x_min_marginals_left=[0] - if len(x_min_marginals_right)==0: - x_min_marginals_right=[text_regions.shape[1]-1] - - - - - #print(x_min_marginals_left[0],x_min_marginals_right[0],'margo') - - #print(marginlas_should_be_main_text,'marginlas_should_be_main_text') - text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) - - #print(np.unique(text_regions)) - - #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 - #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 - - text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0 - text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0 - - ###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4 - - ###text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4 - #plt.plot(region_sum_0) - #plt.plot(peaks,region_sum_0[peaks],'*') - #plt.show() - - - #plt.imshow(text_regions) - #plt.show() - - #sys.exit() - else: - pass - return text_regions - def do_order_of_regions(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): if self.full_layout: @@ -3838,7 +3231,7 @@ class eynollah: regions_without_seperators = (text_regions_p[:, :] == 1) * 1 regions_without_seperators = regions_without_seperators.astype(np.uint8) - text_regions_p = self.get_marginals(rotate_image(regions_without_seperators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew) + text_regions_p = get_marginals(rotate_image(regions_without_seperators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, kernel=self.kernel) except: pass diff --git a/sbb_newspapers_org_image/unused.py b/sbb_newspapers_org_image/unused.py index b0dce6c..9c1fcec 100644 --- a/sbb_newspapers_org_image/unused.py +++ b/sbb_newspapers_org_image/unused.py @@ -2769,3 +2769,261 @@ def do_work_of_textline_seperation(self, queue_of_all_params, polygons_per_proce queue_of_all_params.put([index_polygons_per_process_per_process, polygons_per_par_process_per_process, textregions_cnt_tot_per_process, textlines_cnt_tot_per_process]) +def seperate_lines_new(img_path, thetha, num_col, dir_of_all, f_name): + + if num_col == 1: + num_patches = int(img_path.shape[1] / 200.0) + else: + num_patches = int(img_path.shape[1] / 100.0) + # num_patches=int(img_path.shape[1]/200.) + if num_patches == 0: + num_patches = 1 + (h, w) = img_path.shape[:2] + center = (w // 2, h // 2) + M = cv2.getRotationMatrix2D(center, -thetha, 1.0) + x_d = M[0, 2] + y_d = M[1, 2] + + thetha = thetha / 180.0 * np.pi + rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]]) + + x_min_cont = 0 + x_max_cont = img_path.shape[1] + y_min_cont = 0 + y_max_cont = img_path.shape[0] + + xv = np.linspace(x_min_cont, x_max_cont, 1000) + + mada_n = img_path.sum(axis=1) + + ##plt.plot(mada_n) + ##plt.show() + first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) + + y = mada_n[:] # [first_nonzero:last_nonzero] + y_help = np.zeros(len(y) + 40) + y_help[20 : len(y) + 20] = y + x = np.array(range(len(y))) + + peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) + if len(peaks_real) <= 2 and len(peaks_real) > 1: + sigma_gaus = 10 + else: + sigma_gaus = 6 + + z = gaussian_filter1d(y_help, sigma_gaus) + zneg_rev = -y_help + np.max(y_help) + zneg = np.zeros(len(zneg_rev) + 40) + zneg[20 : len(zneg_rev) + 20] = zneg_rev + zneg = gaussian_filter1d(zneg, sigma_gaus) + + peaks, _ = find_peaks(z, height=0) + peaks_neg, _ = find_peaks(zneg, height=0) + + for nn in range(len(peaks_neg)): + if peaks_neg[nn] > len(z) - 1: + peaks_neg[nn] = len(z) - 1 + if peaks_neg[nn] < 0: + peaks_neg[nn] = 0 + + diff_peaks = np.abs(np.diff(peaks_neg)) + cut_off = 20 + peaks_neg_true = [] + forest = [] + + for i in range(len(peaks_neg)): + if i == 0: + forest.append(peaks_neg[i]) + if i < (len(peaks_neg) - 1): + if diff_peaks[i] <= cut_off: + forest.append(peaks_neg[i + 1]) + if diff_peaks[i] > cut_off: + # print(forest[np.argmin(z[forest]) ] ) + if not isNaN(forest[np.argmin(z[forest])]): + # print(len(z),forest) + peaks_neg_true.append(forest[np.argmin(z[forest])]) + forest = [] + forest.append(peaks_neg[i + 1]) + if i == (len(peaks_neg) - 1): + # print(print(forest[np.argmin(z[forest]) ] )) + if not isNaN(forest[np.argmin(z[forest])]): + + peaks_neg_true.append(forest[np.argmin(z[forest])]) + + peaks_neg_true = np.array(peaks_neg_true) + + """ + #plt.figure(figsize=(40,40)) + #plt.subplot(1,2,1) + #plt.title('Textline segmentation von Textregion') + #plt.imshow(img_path) + #plt.xlabel('X') + #plt.ylabel('Y') + #plt.subplot(1,2,2) + #plt.title('Dichte entlang X') + #base = pyplot.gca().transData + #rot = transforms.Affine2D().rotate_deg(90) + #plt.plot(zneg,np.array(range(len(zneg)))) + #plt.plot(zneg[peaks_neg_true],peaks_neg_true,'*') + #plt.gca().invert_yaxis() + + #plt.xlabel('Dichte') + #plt.ylabel('Y') + ##plt.plot([0,len(y)], [grenze,grenze]) + #plt.show() + """ + + peaks_neg_true = peaks_neg_true - 20 - 20 + peaks = peaks - 20 + + # dis_up=peaks_neg_true[14]-peaks_neg_true[0] + # dis_down=peaks_neg_true[18]-peaks_neg_true[14] + + img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] + + ##plt.imshow(img_patch_ineterst) + ##plt.show() + + length_x = int(img_path.shape[1] / float(num_patches)) + margin = int(0.04 * length_x) + + width_mid = length_x - 2 * margin + + nxf = img_path.shape[1] / float(width_mid) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + else: + nxf = int(nxf) + + slopes_tile_wise = [] + for i in range(nxf): + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + length_x + elif i > 0: + index_x_d = i * width_mid + index_x_u = index_x_d + length_x + + if index_x_u > img_path.shape[1]: + index_x_u = img_path.shape[1] + index_x_d = img_path.shape[1] - length_x + + # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + img_xline = img_patch_ineterst[:, index_x_d:index_x_u] + + sigma = 2 + try: + slope_xline = return_deskew_slop(img_xline, sigma, dir_of_all=dir_of_all, f_name=f_name) + except: + slope_xline = 0 + slopes_tile_wise.append(slope_xline) + # print(slope_xline,'xlineeee') + img_line_rotated = rotate_image(img_xline, slope_xline) + img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 + + """ + + xline=np.linspace(0,img_path.shape[1],nx) + slopes_tile_wise=[] + + for ui in range( nx-1 ): + img_xline=img_patch_ineterst[:,int(xline[ui]):int(xline[ui+1])] + + + ##plt.imshow(img_xline) + ##plt.show() + + sigma=3 + try: + slope_xline=return_deskew_slop(img_xline,sigma, dir_of_all=self.dir_of_all, f_name=self.f_name) + except: + slope_xline=0 + slopes_tile_wise.append(slope_xline) + print(slope_xline,'xlineeee') + img_line_rotated=rotate_image(img_xline,slope_xline) + + ##plt.imshow(img_line_rotated) + ##plt.show() + """ + + # dis_up=peaks_neg_true[14]-peaks_neg_true[0] + # dis_down=peaks_neg_true[18]-peaks_neg_true[14] + + img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] + + img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape) + + for i in range(nxf): + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + length_x + elif i > 0: + index_x_d = i * width_mid + index_x_u = index_x_d + length_x + + if index_x_u > img_path.shape[1]: + index_x_u = img_path.shape[1] + index_x_d = img_path.shape[1] - length_x + + img_xline = img_patch_ineterst[:, index_x_d:index_x_u] + + img_int = np.zeros((img_xline.shape[0], img_xline.shape[1])) + img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0] + + img_resized = np.zeros((int(img_int.shape[0] * (1.2)), int(img_int.shape[1] * (3)))) + + img_resized[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] = img_int[:, :] + ##plt.imshow(img_xline) + ##plt.show() + img_line_rotated = rotate_image(img_resized, slopes_tile_wise[i]) + img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 + + img_patch_seperated = seperate_lines_new_inside_teils(img_line_rotated, 0) + + ##plt.imshow(img_patch_seperated) + ##plt.show() + img_patch_seperated_returned = rotate_image(img_patch_seperated, -slopes_tile_wise[i]) + img_patch_seperated_returned[:, :][img_patch_seperated_returned[:, :] != 0] = 1 + + img_patch_seperated_returned_true_size = img_patch_seperated_returned[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] + + img_patch_seperated_returned_true_size = img_patch_seperated_returned_true_size[:, margin : length_x - margin] + img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_seperated_returned_true_size + + """ + for ui in range( nx-1 ): + img_xline=img_patch_ineterst[:,int(xline[ui]):int(xline[ui+1])] + + + img_int=np.zeros((img_xline.shape[0],img_xline.shape[1])) + img_int[:,:]=img_xline[:,:]#img_patch_org[:,:,0] + + img_resized=np.zeros((int( img_int.shape[0]*(1.2) ) , int( img_int.shape[1]*(3) ) )) + + img_resized[ int( img_int.shape[0]*(.1)):int( img_int.shape[0]*(.1))+img_int.shape[0] , int( img_int.shape[1]*(1)):int( img_int.shape[1]*(1))+img_int.shape[1] ]=img_int[:,:] + ##plt.imshow(img_xline) + ##plt.show() + img_line_rotated=rotate_image(img_resized,slopes_tile_wise[ui]) + + + #img_patch_seperated = seperate_lines_new_inside_teils(img_line_rotated,0) + + img_patch_seperated = seperate_lines_new_inside_teils(img_line_rotated,0) + + img_patch_seperated_returned=rotate_image(img_patch_seperated,-slopes_tile_wise[ui]) + ##plt.imshow(img_patch_seperated) + ##plt.show() + print(img_patch_seperated_returned.shape) + #plt.imshow(img_patch_seperated_returned[ int( img_int.shape[0]*(.1)):int( img_int.shape[0]*(.1))+img_int.shape[0] , int( img_int.shape[1]*(1)):int( img_int.shape[1]*(1))+img_int.shape[1] ]) + #plt.show() + + img_patch_ineterst_revised[:,int(xline[ui]):int(xline[ui+1])]=img_patch_seperated_returned[ int( img_int.shape[0]*(.1)):int( img_int.shape[0]*(.1))+img_int.shape[0] , int( img_int.shape[1]*(1)):int( img_int.shape[1]*(1))+img_int.shape[1] ] + + + """ + + # print(img_patch_ineterst_revised.shape,np.unique(img_patch_ineterst_revised)) + ##plt.imshow(img_patch_ineterst_revised) + ##plt.show() + return img_patch_ineterst_revised diff --git a/sbb_newspapers_org_image/utils/__init__.py b/sbb_newspapers_org_image/utils/__init__.py index d195b6e..ee3bac0 100644 --- a/sbb_newspapers_org_image/utils/__init__.py +++ b/sbb_newspapers_org_image/utils/__init__.py @@ -10,9 +10,6 @@ from scipy.ndimage import gaussian_filter1d from .is_nan import isNaN -def resize_image(img_in, input_height, input_width): - return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) - def crop_image_inside_box(box, img_org_copy): image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]] @@ -701,352 +698,6 @@ def return_regions_without_seperators(regions_pre): return regions_without_seperators -def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=None, f_name=None): - - - if main_page and dir_of_all is not None: - - - plt.figure(figsize=(70,40)) - plt.rcParams['font.size']='50' - plt.subplot(1,2,1) - plt.imshow(img_patch_org) - plt.subplot(1,2,2) - plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))),linewidth=8) - plt.xlabel('Density of textline prediction in direction of X axis',fontsize=60) - plt.ylabel('Height',fontsize=60) - plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))]) - plt.gca().invert_yaxis() - - plt.savefig(os.path.join(dir_of_all, f_name+'_density_of_textline.png')) - #print(np.max(img_patch_org.sum(axis=0)) ,np.max(img_patch_org.sum(axis=1)),'axislar') - - #img_patch_org=resize_image(img_patch_org,int(img_patch_org.shape[0]*2.5),int(img_patch_org.shape[1]/2.5)) - - #print(np.max(img_patch_org.sum(axis=0)) ,np.max(img_patch_org.sum(axis=1)),'axislar2') - - img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1])) - img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0] - - - - max_shape=np.max(img_int.shape) - img_resized=np.zeros((int( max_shape*(1.1) ) , int( max_shape*(1.1) ) )) - - - onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.) - onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.) - - - #img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) )) - - - - #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] - img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:] - - #print(img_resized.shape,'img_resizedshape') - #plt.imshow(img_resized) - #plt.show() - - if main_page and img_patch_org.shape[1]>img_patch_org.shape[0]: - - #plt.imshow(img_resized) - #plt.show() - angels=np.array([-45, 0 , 45 , 90 , ])#np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) - - #res=[] - #num_of_peaks=[] - #index_cor=[] - var_res=[] - - #indexer=0 - for rot in angels: - img_rot=self.rotate_image(img_resized,rot) - #plt.imshow(img_rot) - #plt.show() - img_rot[img_rot!=0]=1 - #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) - - - #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(var_spectrum,'var_spectrum') - try: - var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - ##print(rot,var_spectrum,'var_spectrum') - #res_me=np.mean(neg_peaks) - #if res_me==0: - #res_me=1000000000000000000000 - #else: - #pass - - #res_num=len(neg_peaks) - except: - #res_me=1000000000000000000000 - #res_num=0 - var_spectrum=0 - #if self.isNaN(res_me): - #pass - #else: - #res.append( res_me ) - #var_res.append(var_spectrum) - #num_of_peaks.append( res_num ) - #index_cor.append(indexer) - #indexer=indexer+1 - - var_res.append(var_spectrum) - #index_cor.append(indexer) - #indexer=indexer+1 - - - try: - var_res=np.array(var_res) - - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - - angels=np.linspace(ang_int-22.5,ang_int+22.5,100) - - #res=[] - #num_of_peaks=[] - #index_cor=[] - var_res=[] - - - for rot in angels: - img_rot=self.rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) - try: - var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - - except: - var_spectrum=0 - - var_res.append(var_spectrum) - - - - - try: - var_res=np.array(var_res) - - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - elif main_page and img_patch_org.shape[1]<=img_patch_org.shape[0]: - - #plt.imshow(img_resized) - #plt.show() - angels=np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) - - - var_res=[] - - for rot in angels: - img_rot=self.rotate_image(img_resized,rot) - #plt.imshow(img_rot) - #plt.show() - img_rot[img_rot!=0]=1 - #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) - - - #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(var_spectrum,'var_spectrum') - try: - var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - - except: - var_spectrum=0 - - var_res.append(var_spectrum) - - - if self.dir_of_all is not None: - #print('galdi?') - plt.figure(figsize=(60,30)) - plt.rcParams['font.size']='50' - plt.plot(angels,np.array(var_res),'-o',markersize=25,linewidth=4) - plt.xlabel('angle',fontsize=50) - plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50) - - plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))] ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$') - plt.legend(loc='best') - plt.savefig(os.path.join(self.dir_of_all,self.f_name+'_rotation_angle.png')) - - - try: - var_res=np.array(var_res) - - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - - early_slope_edge=11 - if abs(ang_int)>early_slope_edge and ang_int<0: - - angels=np.linspace(-90,-12,100) - - var_res=[] - - for rot in angels: - img_rot=self.rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) - try: - var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - - var_res.append(var_spectrum) - - - try: - var_res=np.array(var_res) - - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - elif abs(ang_int)>early_slope_edge and ang_int>0: - - angels=np.linspace(90,12,100) - - - var_res=[] - - for rot in angels: - img_rot=self.rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) - try: - var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(indexer,'indexer') - except: - var_spectrum=0 - - var_res.append(var_spectrum) - - - try: - var_res=np.array(var_res) - - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - else: - - - angels=np.linspace(-25,25,60) - - var_res=[] - - indexer=0 - for rot in angels: - img_rot=self.rotate_image(img_resized,rot) - #plt.imshow(img_rot) - #plt.show() - img_rot[img_rot!=0]=1 - #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) - - - #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(var_spectrum,'var_spectrum') - try: - var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - - var_res.append(var_spectrum) - - - try: - var_res=np.array(var_res) - - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - #plt.plot(var_res) - #plt.show() - - ##plt.plot(mom3_res) - ##plt.show() - #print(ang_int,'ang_int111') - - early_slope_edge=22 - if abs(ang_int)>early_slope_edge and ang_int<0: - - angels=np.linspace(-90,-25,60) - - var_res=[] - - for rot in angels: - img_rot=self.rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) - try: - var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - - except: - var_spectrum=0 - - var_res.append(var_spectrum) - - - - try: - var_res=np.array(var_res) - - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - elif abs(ang_int)>early_slope_edge and ang_int>0: - - angels=np.linspace(90,25,60) - - var_res=[] - - indexer=0 - for rot in angels: - img_rot=self.rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) - try: - var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(indexer,'indexer') - except: - var_spectrum=0 - - var_res.append(var_spectrum) - - - - try: - var_res=np.array(var_res) - - ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] - except: - ang_int=0 - - return ang_int - - def put_drop_out_from_only_drop_model(layout_no_patch, layout1): drop_only = (layout_no_patch[:, :, 0] == 4) * 1 diff --git a/sbb_newspapers_org_image/utils/marginals.py b/sbb_newspapers_org_image/utils/marginals.py new file mode 100644 index 0000000..08c700a --- /dev/null +++ b/sbb_newspapers_org_image/utils/marginals.py @@ -0,0 +1,252 @@ +import numpy as np +import cv2 +from scipy.signal import find_peaks +from scipy.ndimage import gaussian_filter1d + + +from .contour import find_new_features_of_contoures, return_contours_of_interested_region +from .resize import resize_image +from .rotate import rotate_image + +def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None): + mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1])) + mask_marginals=mask_marginals.astype(np.uint8) + + + text_with_lines=text_with_lines.astype(np.uint8) + ##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3) + + text_with_lines_eroded=cv2.erode(text_with_lines,kernel,iterations=5) + + if text_with_lines.shape[0]<=1500: + pass + elif text_with_lines.shape[0]>1500 and text_with_lines.shape[0]<=1800: + text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.5),text_with_lines.shape[1]) + text_with_lines=cv2.erode(text_with_lines,kernel,iterations=5) + text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1]) + else: + text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.8),text_with_lines.shape[1]) + text_with_lines=cv2.erode(text_with_lines,kernel,iterations=7) + text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1]) + + + text_with_lines_y=text_with_lines.sum(axis=0) + text_with_lines_y_eroded=text_with_lines_eroded.sum(axis=0) + + thickness_along_y_percent=text_with_lines_y_eroded.max()/(float(text_with_lines.shape[0]))*100 + + #print(thickness_along_y_percent,'thickness_along_y_percent') + + if thickness_along_y_percent<30: + min_textline_thickness=8 + elif thickness_along_y_percent>=30 and thickness_along_y_percent<50: + min_textline_thickness=20 + else: + min_textline_thickness=40 + + + + if thickness_along_y_percent>=14: + + text_with_lines_y_rev=-1*text_with_lines_y[:] + #print(text_with_lines_y) + #print(text_with_lines_y_rev) + + + + + #plt.plot(text_with_lines_y) + #plt.show() + + + text_with_lines_y_rev=text_with_lines_y_rev-np.min(text_with_lines_y_rev) + + #plt.plot(text_with_lines_y_rev) + #plt.show() + sigma_gaus=1 + region_sum_0= gaussian_filter1d(text_with_lines_y, sigma_gaus) + + region_sum_0_rev=gaussian_filter1d(text_with_lines_y_rev, sigma_gaus) + + #plt.plot(region_sum_0_rev) + #plt.show() + region_sum_0_updown=region_sum_0[len(region_sum_0)::-1] + + first_nonzero=(next((i for i, x in enumerate(region_sum_0) if x), None)) + last_nonzero=(next((i for i, x in enumerate(region_sum_0_updown) if x), None)) + + + last_nonzero=len(region_sum_0)-last_nonzero + + ##img_sum_0_smooth_rev=-region_sum_0 + + + mid_point=(last_nonzero+first_nonzero)/2. + + + one_third_right=(last_nonzero-mid_point)/3.0 + one_third_left=(mid_point-first_nonzero)/3.0 + + #img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev) + + + + + peaks, _ = find_peaks(text_with_lines_y_rev, height=0) + + + peaks=np.array(peaks) + + + #print(region_sum_0[peaks]) + ##plt.plot(region_sum_0) + ##plt.plot(peaks,region_sum_0[peaks],'*') + ##plt.show() + #print(first_nonzero,last_nonzero,peaks) + peaks=peaks[(peaks>first_nonzero) & ((peaksmid_point] + peaks_left=peaks[peaks(mid_point+one_third_right)] + peaks_left=peaks[peaks<(mid_point-one_third_left)] + + + try: + point_right=np.min(peaks_right) + except: + point_right=last_nonzero + + + try: + point_left=np.max(peaks_left) + except: + point_left=first_nonzero + + + + + #print(point_left,point_right) + #print(text_regions.shape) + if point_right>=mask_marginals.shape[1]: + point_right=mask_marginals.shape[1]-1 + + try: + mask_marginals[:,point_left:point_right]=1 + except: + mask_marginals[:,:]=1 + + #print(mask_marginals.shape,point_left,point_right,'nadosh') + mask_marginals_rotated=rotate_image(mask_marginals,-slope_deskew) + + #print(mask_marginals_rotated.shape,'nadosh') + mask_marginals_rotated_sum=mask_marginals_rotated.sum(axis=0) + + mask_marginals_rotated_sum[mask_marginals_rotated_sum!=0]=1 + index_x=np.array(range(len(mask_marginals_rotated_sum)))+1 + + index_x_interest=index_x[mask_marginals_rotated_sum==1] + + min_point_of_left_marginal=np.min(index_x_interest)-16 + max_point_of_right_marginal=np.max(index_x_interest)+16 + + if min_point_of_left_marginal<0: + min_point_of_left_marginal=0 + if max_point_of_right_marginal>=text_regions.shape[1]: + max_point_of_right_marginal=text_regions.shape[1]-1 + + + #print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew') + #print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated') + #plt.imshow(mask_marginals) + #plt.show() + + #plt.imshow(mask_marginals_rotated) + #plt.show() + + text_regions[(mask_marginals_rotated[:,:]!=1) & (text_regions[:,:]==1)]=4 + + #plt.imshow(text_regions) + #plt.show() + + pixel_img=4 + min_area_text=0.00001 + polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text) + + cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contoures(polygons_of_marginals) + + text_regions[(text_regions[:,:]==4)]=1 + + marginlas_should_be_main_text=[] + + x_min_marginals_left=[] + x_min_marginals_right=[] + + for i in range(len(cx_text_only)): + + x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i]) + y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i]) + #print(x_width_mar,y_height_mar,y_height_mar/x_width_mar,'y_height_mar') + if x_width_mar>16 and y_height_mar/x_width_mar<18: + marginlas_should_be_main_text.append(polygons_of_marginals[i]) + if x_min_text_only[i]<(mid_point-one_third_left): + x_min_marginals_left_new=x_min_text_only[i] + if len(x_min_marginals_left)==0: + x_min_marginals_left.append(x_min_marginals_left_new) + else: + x_min_marginals_left[0]=min(x_min_marginals_left[0],x_min_marginals_left_new) + else: + x_min_marginals_right_new=x_min_text_only[i] + if len(x_min_marginals_right)==0: + x_min_marginals_right.append(x_min_marginals_right_new) + else: + x_min_marginals_right[0]=min(x_min_marginals_right[0],x_min_marginals_right_new) + + if len(x_min_marginals_left)==0: + x_min_marginals_left=[0] + if len(x_min_marginals_right)==0: + x_min_marginals_right=[text_regions.shape[1]-1] + + + + + #print(x_min_marginals_left[0],x_min_marginals_right[0],'margo') + + #print(marginlas_should_be_main_text,'marginlas_should_be_main_text') + text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) + + #print(np.unique(text_regions)) + + #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 + #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 + + text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0 + text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0 + + ###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4 + + ###text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4 + #plt.plot(region_sum_0) + #plt.plot(peaks,region_sum_0[peaks],'*') + #plt.show() + + + #plt.imshow(text_regions) + #plt.show() + + #sys.exit() + else: + pass + return text_regions diff --git a/sbb_newspapers_org_image/utils/resize.py b/sbb_newspapers_org_image/utils/resize.py new file mode 100644 index 0000000..fdc49ec --- /dev/null +++ b/sbb_newspapers_org_image/utils/resize.py @@ -0,0 +1,4 @@ +import cv2 + +def resize_image(img_in, input_height, input_width): + return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) diff --git a/sbb_newspapers_org_image/utils/separate_lines.py b/sbb_newspapers_org_image/utils/separate_lines.py index c1ef990..948ee35 100644 --- a/sbb_newspapers_org_image/utils/separate_lines.py +++ b/sbb_newspapers_org_image/utils/separate_lines.py @@ -1,3 +1,4 @@ +import matplotlib.pyplot as plt import numpy as np import cv2 from scipy.signal import find_peaks @@ -1366,3 +1367,456 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest return contours_rotated_clean +def seperate_lines_new2(img_path, thetha, num_col, slope_region, dir_of_all, f_name): + + if num_col == 1: + num_patches = int(img_path.shape[1] / 200.0) + else: + num_patches = int(img_path.shape[1] / 140.0) + # num_patches=int(img_path.shape[1]/200.) + if num_patches == 0: + num_patches = 1 + + img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[15]+dis_down ,:] + + # plt.imshow(img_patch_ineterst) + # plt.show() + + length_x = int(img_path.shape[1] / float(num_patches)) + # margin = int(0.04 * length_x) just recently this was changed because it break lines into 2 + margin = int(0.04 * length_x) + # print(margin,'margin') + # if margin<=4: + # margin = int(0.08 * length_x) + + # margin=0 + + width_mid = length_x - 2 * margin + + nxf = img_path.shape[1] / float(width_mid) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + else: + nxf = int(nxf) + + slopes_tile_wise = [] + for i in range(nxf): + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + length_x + elif i > 0: + index_x_d = i * width_mid + index_x_u = index_x_d + length_x + + if index_x_u > img_path.shape[1]: + index_x_u = img_path.shape[1] + index_x_d = img_path.shape[1] - length_x + + # img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + img_xline = img_patch_ineterst[:, index_x_d:index_x_u] + + sigma = 2 + try: + slope_xline = return_deskew_slop(img_xline, sigma, dir_of_all=dir_of_all, f_name=f_name) + except: + slope_xline = 0 + + if abs(slope_region) < 25 and abs(slope_xline) > 25: + slope_xline = [slope_region][0] + # if abs(slope_region)>70 and abs(slope_xline)<25: + # slope_xline=[slope_region][0] + slopes_tile_wise.append(slope_xline) + # print(slope_xline,'xlineeee') + img_line_rotated = rotate_image(img_xline, slope_xline) + img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 + + # print(slopes_tile_wise,'slopes_tile_wise') + img_patch_ineterst = img_path[:, :] # [peaks_neg_true[14]-dis_up:peaks_neg_true[14]+dis_down ,:] + + img_patch_ineterst_revised = np.zeros(img_patch_ineterst.shape) + + for i in range(nxf): + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + length_x + elif i > 0: + index_x_d = i * width_mid + index_x_u = index_x_d + length_x + + if index_x_u > img_path.shape[1]: + index_x_u = img_path.shape[1] + index_x_d = img_path.shape[1] - length_x + + img_xline = img_patch_ineterst[:, index_x_d:index_x_u] + + img_int = np.zeros((img_xline.shape[0], img_xline.shape[1])) + img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0] + + img_resized = np.zeros((int(img_int.shape[0] * (1.2)), int(img_int.shape[1] * (3)))) + + img_resized[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] = img_int[:, :] + # plt.imshow(img_xline) + # plt.show() + img_line_rotated = rotate_image(img_resized, slopes_tile_wise[i]) + img_line_rotated[:, :][img_line_rotated[:, :] != 0] = 1 + + img_patch_seperated = seperate_lines_new_inside_teils2(img_line_rotated, 0) + + img_patch_seperated_returned = rotate_image(img_patch_seperated, -slopes_tile_wise[i]) + img_patch_seperated_returned[:, :][img_patch_seperated_returned[:, :] != 0] = 1 + + img_patch_seperated_returned_true_size = img_patch_seperated_returned[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] + + img_patch_seperated_returned_true_size = img_patch_seperated_returned_true_size[:, margin : length_x - margin] + img_patch_ineterst_revised[:, index_x_d + margin : index_x_u - margin] = img_patch_seperated_returned_true_size + + # plt.imshow(img_patch_ineterst_revised) + # plt.show() + return img_patch_ineterst_revised + +def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=None, f_name=None): + + + if main_page and dir_of_all is not None: + + + plt.figure(figsize=(70,40)) + plt.rcParams['font.size']='50' + plt.subplot(1,2,1) + plt.imshow(img_patch_org) + plt.subplot(1,2,2) + plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))),linewidth=8) + plt.xlabel('Density of textline prediction in direction of X axis',fontsize=60) + plt.ylabel('Height',fontsize=60) + plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))]) + plt.gca().invert_yaxis() + + plt.savefig(os.path.join(dir_of_all, f_name+'_density_of_textline.png')) + #print(np.max(img_patch_org.sum(axis=0)) ,np.max(img_patch_org.sum(axis=1)),'axislar') + + #img_patch_org=resize_image(img_patch_org,int(img_patch_org.shape[0]*2.5),int(img_patch_org.shape[1]/2.5)) + + #print(np.max(img_patch_org.sum(axis=0)) ,np.max(img_patch_org.sum(axis=1)),'axislar2') + + img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1])) + img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0] + + + + max_shape=np.max(img_int.shape) + img_resized=np.zeros((int( max_shape*(1.1) ) , int( max_shape*(1.1) ) )) + + + onset_x=int((img_resized.shape[1]-img_int.shape[1])/2.) + onset_y=int((img_resized.shape[0]-img_int.shape[0])/2.) + + + #img_resized=np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) )) + + + + #img_resized[ int( img_int.shape[0]*(.4)):int( img_int.shape[0]*(.4))+img_int.shape[0] , int( img_int.shape[1]*(.8)):int( img_int.shape[1]*(.8))+img_int.shape[1] ]=img_int[:,:] + img_resized[ onset_y:onset_y+img_int.shape[0] , onset_x:onset_x+img_int.shape[1] ]=img_int[:,:] + + #print(img_resized.shape,'img_resizedshape') + #plt.imshow(img_resized) + #plt.show() + + if main_page and img_patch_org.shape[1]>img_patch_org.shape[0]: + + #plt.imshow(img_resized) + #plt.show() + angels=np.array([-45, 0 , 45 , 90 , ])#np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) + + #res=[] + #num_of_peaks=[] + #index_cor=[] + var_res=[] + + #indexer=0 + for rot in angels: + img_rot=self.rotate_image(img_resized,rot) + #plt.imshow(img_rot) + #plt.show() + img_rot[img_rot!=0]=1 + #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) + + + #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + #print(var_spectrum,'var_spectrum') + try: + var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + ##print(rot,var_spectrum,'var_spectrum') + #res_me=np.mean(neg_peaks) + #if res_me==0: + #res_me=1000000000000000000000 + #else: + #pass + + #res_num=len(neg_peaks) + except: + #res_me=1000000000000000000000 + #res_num=0 + var_spectrum=0 + #if self.isNaN(res_me): + #pass + #else: + #res.append( res_me ) + #var_res.append(var_spectrum) + #num_of_peaks.append( res_num ) + #index_cor.append(indexer) + #indexer=indexer+1 + + var_res.append(var_spectrum) + #index_cor.append(indexer) + #indexer=indexer+1 + + + try: + var_res=np.array(var_res) + + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] + except: + ang_int=0 + + + angels=np.linspace(ang_int-22.5,ang_int+22.5,100) + + #res=[] + #num_of_peaks=[] + #index_cor=[] + var_res=[] + + + for rot in angels: + img_rot=self.rotate_image(img_resized,rot) + ##plt.imshow(img_rot) + ##plt.show() + img_rot[img_rot!=0]=1 + #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) + try: + var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + + except: + var_spectrum=0 + + var_res.append(var_spectrum) + + + + + try: + var_res=np.array(var_res) + + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] + except: + ang_int=0 + + elif main_page and img_patch_org.shape[1]<=img_patch_org.shape[0]: + + #plt.imshow(img_resized) + #plt.show() + angels=np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) + + + var_res=[] + + for rot in angels: + img_rot=self.rotate_image(img_resized,rot) + #plt.imshow(img_rot) + #plt.show() + img_rot[img_rot!=0]=1 + #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) + + + #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + #print(var_spectrum,'var_spectrum') + try: + var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + + except: + var_spectrum=0 + + var_res.append(var_spectrum) + + + if self.dir_of_all is not None: + #print('galdi?') + plt.figure(figsize=(60,30)) + plt.rcParams['font.size']='50' + plt.plot(angels,np.array(var_res),'-o',markersize=25,linewidth=4) + plt.xlabel('angle',fontsize=50) + plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50) + + plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))] ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$') + plt.legend(loc='best') + plt.savefig(os.path.join(self.dir_of_all,self.f_name+'_rotation_angle.png')) + + + try: + var_res=np.array(var_res) + + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] + except: + ang_int=0 + + + early_slope_edge=11 + if abs(ang_int)>early_slope_edge and ang_int<0: + + angels=np.linspace(-90,-12,100) + + var_res=[] + + for rot in angels: + img_rot=self.rotate_image(img_resized,rot) + ##plt.imshow(img_rot) + ##plt.show() + img_rot[img_rot!=0]=1 + #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) + try: + var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + except: + var_spectrum=0 + + var_res.append(var_spectrum) + + + try: + var_res=np.array(var_res) + + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] + except: + ang_int=0 + + elif abs(ang_int)>early_slope_edge and ang_int>0: + + angels=np.linspace(90,12,100) + + + var_res=[] + + for rot in angels: + img_rot=self.rotate_image(img_resized,rot) + ##plt.imshow(img_rot) + ##plt.show() + img_rot[img_rot!=0]=1 + #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) + try: + var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + #print(indexer,'indexer') + except: + var_spectrum=0 + + var_res.append(var_spectrum) + + + try: + var_res=np.array(var_res) + + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] + except: + ang_int=0 + else: + + + angels=np.linspace(-25,25,60) + + var_res=[] + + indexer=0 + for rot in angels: + img_rot=self.rotate_image(img_resized,rot) + #plt.imshow(img_rot) + #plt.show() + img_rot[img_rot!=0]=1 + #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) + + + #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + #print(var_spectrum,'var_spectrum') + try: + var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + except: + var_spectrum=0 + + var_res.append(var_spectrum) + + + try: + var_res=np.array(var_res) + + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] + except: + ang_int=0 + + #plt.plot(var_res) + #plt.show() + + ##plt.plot(mom3_res) + ##plt.show() + #print(ang_int,'ang_int111') + + early_slope_edge=22 + if abs(ang_int)>early_slope_edge and ang_int<0: + + angels=np.linspace(-90,-25,60) + + var_res=[] + + for rot in angels: + img_rot=self.rotate_image(img_resized,rot) + ##plt.imshow(img_rot) + ##plt.show() + img_rot[img_rot!=0]=1 + #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) + try: + var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + + except: + var_spectrum=0 + + var_res.append(var_spectrum) + + + + try: + var_res=np.array(var_res) + + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] + except: + ang_int=0 + + elif abs(ang_int)>early_slope_edge and ang_int>0: + + angels=np.linspace(90,25,60) + + var_res=[] + + indexer=0 + for rot in angels: + img_rot=self.rotate_image(img_resized,rot) + ##plt.imshow(img_rot) + ##plt.show() + img_rot[img_rot!=0]=1 + #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) + try: + var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + #print(indexer,'indexer') + except: + var_spectrum=0 + + var_res.append(var_spectrum) + + + + try: + var_res=np.array(var_res) + + ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] + except: + ang_int=0 + + return ang_int +