From 63fe7ac763f3e2b6e6914089eb45a93f40b52689 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 24 Nov 2020 12:43:40 +0100 Subject: [PATCH] more extraction of util/unused functions --- sbb_newspapers_org_image/eynollah.py | 142 ++++----------------------- sbb_newspapers_org_image/unused.py | 6 ++ sbb_newspapers_org_image/utils.py | 97 ++++++++++++++++++ 3 files changed, 123 insertions(+), 122 deletions(-) diff --git a/sbb_newspapers_org_image/eynollah.py b/sbb_newspapers_org_image/eynollah.py index 8748dd6..2e5ae40 100644 --- a/sbb_newspapers_org_image/eynollah.py +++ b/sbb_newspapers_org_image/eynollah.py @@ -60,6 +60,7 @@ from .utils import ( seperate_lines, seperate_lines_new_inside_teils2, filter_small_drop_capitals_from_no_patch_layout, + find_num_col_deskew, ) @@ -1542,12 +1543,6 @@ class eynollah: return main_contours - def get_all_image_patches_coordination(self, image_page): - self.all_box_coord = [] - for jk in range(len(self.boxes)): - _, crop_coor = crop_image_inside_box(self.boxes[jk], image_page) - self.all_box_coord.append(crop_coor) - def textline_contours(self, img, patches, scaler_h, scaler_w): if patches: @@ -2341,103 +2336,6 @@ class eynollah: return interest_neg_fin - def find_num_col_deskew(self, regions_without_seperators, sigma_, multiplier=3.8): - regions_without_seperators_0 = regions_without_seperators[:, :].sum(axis=1) - - meda_n_updown = regions_without_seperators_0[len(regions_without_seperators_0) :: -1] - - first_nonzero = next((i for i, x in enumerate(regions_without_seperators_0) if x), 0) - last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0) - - last_nonzero = len(regions_without_seperators_0) - last_nonzero - - y = regions_without_seperators_0 # [first_nonzero:last_nonzero] - - y_help = np.zeros(len(y) + 20) - - y_help[10 : len(y) + 10] = y - - x = np.array(range(len(y))) - - zneg_rev = -y_help + np.max(y_help) - - zneg = np.zeros(len(zneg_rev) + 20) - - zneg[10 : len(zneg_rev) + 10] = zneg_rev - - z = gaussian_filter1d(y, sigma_) - zneg = gaussian_filter1d(zneg, sigma_) - - peaks_neg, _ = find_peaks(zneg, height=0) - peaks, _ = find_peaks(z, height=0) - - peaks_neg = peaks_neg - 10 - 10 - - # print(np.std(z),'np.std(z)np.std(z)np.std(z)') - - ##plt.plot(z) - ##plt.show() - - ##plt.imshow(regions_without_seperators) - ##plt.show() - """ - last_nonzero=last_nonzero-0#100 - first_nonzero=first_nonzero+0#+100 - - peaks_neg=peaks_neg[(peaks_neg>first_nonzero) & (peaks_neg.06*regions_without_seperators.shape[1]) & (peaks<0.94*regions_without_seperators.shape[1])] - """ - interest_pos = z[peaks] - - interest_pos = interest_pos[interest_pos > 10] - - interest_neg = z[peaks_neg] - - min_peaks_pos = np.mean(interest_pos) - min_peaks_neg = 0 # np.min(interest_neg) - - dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier - # print(interest_pos) - grenze = min_peaks_pos - dis_talaei # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 - - interest_neg_fin = interest_neg[(interest_neg < grenze)] - peaks_neg_fin = peaks_neg[(interest_neg < grenze)] - interest_neg_fin = interest_neg[(interest_neg < grenze)] - - """ - if interest_neg[0]<0.1: - interest_neg=interest_neg[1:] - if interest_neg[len(interest_neg)-1]<0.1: - interest_neg=interest_neg[:len(interest_neg)-1] - - - - min_peaks_pos=np.min(interest_pos) - min_peaks_neg=0#np.min(interest_neg) - - - dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier - grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 - """ - # interest_neg_fin=interest_neg#[(interest_negfirst_nonzero) & (peaks_neg.06*regions_without_seperators.shape[1]) & (peaks<0.94*regions_without_seperators.shape[1])] + """ + interest_pos = z[peaks] + + interest_pos = interest_pos[interest_pos > 10] + + interest_neg = z[peaks_neg] + + min_peaks_pos = np.mean(interest_pos) + min_peaks_neg = 0 # np.min(interest_neg) + + dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier + # print(interest_pos) + grenze = min_peaks_pos - dis_talaei # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 + + interest_neg_fin = interest_neg[(interest_neg < grenze)] + peaks_neg_fin = peaks_neg[(interest_neg < grenze)] + interest_neg_fin = interest_neg[(interest_neg < grenze)] + + """ + if interest_neg[0]<0.1: + interest_neg=interest_neg[1:] + if interest_neg[len(interest_neg)-1]<0.1: + interest_neg=interest_neg[:len(interest_neg)-1] + + + + min_peaks_pos=np.min(interest_pos) + min_peaks_neg=0#np.min(interest_neg) + + + dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier + grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 + """ + # interest_neg_fin=interest_neg#[(interest_neg