more extraction of util/unused functions

pull/8/head
Konstantin Baierer 4 years ago
parent 3f6ea176e7
commit 63fe7ac763

@ -60,6 +60,7 @@ from .utils import (
seperate_lines, seperate_lines,
seperate_lines_new_inside_teils2, seperate_lines_new_inside_teils2,
filter_small_drop_capitals_from_no_patch_layout, filter_small_drop_capitals_from_no_patch_layout,
find_num_col_deskew,
) )
@ -1542,12 +1543,6 @@ class eynollah:
return main_contours return main_contours
def get_all_image_patches_coordination(self, image_page):
self.all_box_coord = []
for jk in range(len(self.boxes)):
_, crop_coor = crop_image_inside_box(self.boxes[jk], image_page)
self.all_box_coord.append(crop_coor)
def textline_contours(self, img, patches, scaler_h, scaler_w): def textline_contours(self, img, patches, scaler_h, scaler_w):
if patches: if patches:
@ -2341,103 +2336,6 @@ class eynollah:
return interest_neg_fin return interest_neg_fin
def find_num_col_deskew(self, regions_without_seperators, sigma_, multiplier=3.8):
regions_without_seperators_0 = regions_without_seperators[:, :].sum(axis=1)
meda_n_updown = regions_without_seperators_0[len(regions_without_seperators_0) :: -1]
first_nonzero = next((i for i, x in enumerate(regions_without_seperators_0) if x), 0)
last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0)
last_nonzero = len(regions_without_seperators_0) - last_nonzero
y = regions_without_seperators_0 # [first_nonzero:last_nonzero]
y_help = np.zeros(len(y) + 20)
y_help[10 : len(y) + 10] = y
x = np.array(range(len(y)))
zneg_rev = -y_help + np.max(y_help)
zneg = np.zeros(len(zneg_rev) + 20)
zneg[10 : len(zneg_rev) + 10] = zneg_rev
z = gaussian_filter1d(y, sigma_)
zneg = gaussian_filter1d(zneg, sigma_)
peaks_neg, _ = find_peaks(zneg, height=0)
peaks, _ = find_peaks(z, height=0)
peaks_neg = peaks_neg - 10 - 10
# print(np.std(z),'np.std(z)np.std(z)np.std(z)')
##plt.plot(z)
##plt.show()
##plt.imshow(regions_without_seperators)
##plt.show()
"""
last_nonzero=last_nonzero-0#100
first_nonzero=first_nonzero+0#+100
peaks_neg=peaks_neg[(peaks_neg>first_nonzero) & (peaks_neg<last_nonzero)]
peaks=peaks[(peaks>.06*regions_without_seperators.shape[1]) & (peaks<0.94*regions_without_seperators.shape[1])]
"""
interest_pos = z[peaks]
interest_pos = interest_pos[interest_pos > 10]
interest_neg = z[peaks_neg]
min_peaks_pos = np.mean(interest_pos)
min_peaks_neg = 0 # np.min(interest_neg)
dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier
# print(interest_pos)
grenze = min_peaks_pos - dis_talaei # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
interest_neg_fin = interest_neg[(interest_neg < grenze)]
peaks_neg_fin = peaks_neg[(interest_neg < grenze)]
interest_neg_fin = interest_neg[(interest_neg < grenze)]
"""
if interest_neg[0]<0.1:
interest_neg=interest_neg[1:]
if interest_neg[len(interest_neg)-1]<0.1:
interest_neg=interest_neg[:len(interest_neg)-1]
min_peaks_pos=np.min(interest_pos)
min_peaks_neg=0#np.min(interest_neg)
dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier
grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
"""
# interest_neg_fin=interest_neg#[(interest_neg<grenze)]
# peaks_neg_fin=peaks_neg#[(interest_neg<grenze)]
# interest_neg_fin=interest_neg#[(interest_neg<grenze)]
num_col = (len(interest_neg_fin)) + 1
p_l = 0
p_u = len(y) - 1
p_m = int(len(y) / 2.0)
p_g_l = int(len(y) / 3.0)
p_g_u = len(y) - int(len(y) / 3.0)
diff_peaks = np.abs(np.diff(peaks_neg_fin))
diff_peaks_annormal = diff_peaks[diff_peaks < 30]
# print(len(interest_neg_fin),np.mean(interest_neg_fin))
return interest_neg_fin, np.std(z)
def return_deskew_slop(self, img_patch_org, sigma_des, main_page=False): def return_deskew_slop(self, img_patch_org, sigma_des, main_page=False):
if main_page and self.dir_of_all is not None: if main_page and self.dir_of_all is not None:
@ -2491,12 +2389,12 @@ class eynollah:
# plt.imshow(img_rot) # plt.imshow(img_rot)
# plt.show() # plt.show()
img_rot[img_rot != 0] = 1 img_rot[img_rot != 0] = 1
# res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) # res_me=np.mean(find_num_col_deskew(img_rot,sigma_des,2.0 ))
# neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) # neg_peaks,var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 )
# print(var_spectrum,'var_spectrum') # print(var_spectrum,'var_spectrum')
try: try:
neg_peaks, var_spectrum = self.find_num_col_deskew(img_rot, sigma_des, 20.3) neg_peaks, var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
# print(rot,var_spectrum,'var_spectrum') # print(rot,var_spectrum,'var_spectrum')
res_me = np.mean(neg_peaks) res_me = np.mean(neg_peaks)
if res_me == 0: if res_me == 0:
@ -2538,9 +2436,9 @@ class eynollah:
##plt.imshow(img_rot) ##plt.imshow(img_rot)
##plt.show() ##plt.show()
img_rot[img_rot != 0] = 1 img_rot[img_rot != 0] = 1
# res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) # res_me=np.mean(find_num_col_deskew(img_rot,sigma_des,2.0 ))
try: try:
neg_peaks, var_spectrum = self.find_num_col_deskew(img_rot, sigma_des, 20.3) neg_peaks, var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
# print(indexer,'indexer') # print(indexer,'indexer')
res_me = np.mean(neg_peaks) res_me = np.mean(neg_peaks)
if res_me == 0: if res_me == 0:
@ -2586,12 +2484,12 @@ class eynollah:
# plt.imshow(img_rot) # plt.imshow(img_rot)
# plt.show() # plt.show()
img_rot[img_rot != 0] = 1 img_rot[img_rot != 0] = 1
# res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) # res_me=np.mean(find_num_col_deskew(img_rot,sigma_des,2.0 ))
# neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) # neg_peaks,var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 )
# print(var_spectrum,'var_spectrum') # print(var_spectrum,'var_spectrum')
try: try:
neg_peaks, var_spectrum = self.find_num_col_deskew(img_rot, sigma_des, 20.3) neg_peaks, var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
# print(rot,var_spectrum,'var_spectrum') # print(rot,var_spectrum,'var_spectrum')
res_me = np.mean(neg_peaks) res_me = np.mean(neg_peaks)
if res_me == 0: if res_me == 0:
@ -2648,9 +2546,9 @@ class eynollah:
##plt.imshow(img_rot) ##plt.imshow(img_rot)
##plt.show() ##plt.show()
img_rot[img_rot != 0] = 1 img_rot[img_rot != 0] = 1
# res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) # res_me=np.mean(find_num_col_deskew(img_rot,sigma_des,2.0 ))
try: try:
neg_peaks, var_spectrum = self.find_num_col_deskew(img_rot, sigma_des, 20.3) neg_peaks, var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
# print(indexer,'indexer') # print(indexer,'indexer')
res_me = np.mean(neg_peaks) res_me = np.mean(neg_peaks)
if res_me == 0: if res_me == 0:
@ -2694,9 +2592,9 @@ class eynollah:
##plt.imshow(img_rot) ##plt.imshow(img_rot)
##plt.show() ##plt.show()
img_rot[img_rot != 0] = 1 img_rot[img_rot != 0] = 1
# res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) # res_me=np.mean(find_num_col_deskew(img_rot,sigma_des,2.0 ))
try: try:
neg_peaks, var_spectrum = self.find_num_col_deskew(img_rot, sigma_des, 20.3) neg_peaks, var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
# print(indexer,'indexer') # print(indexer,'indexer')
res_me = np.mean(neg_peaks) res_me = np.mean(neg_peaks)
if res_me == 0: if res_me == 0:
@ -2739,12 +2637,12 @@ class eynollah:
# plt.imshow(img_rot) # plt.imshow(img_rot)
# plt.show() # plt.show()
img_rot[img_rot != 0] = 1 img_rot[img_rot != 0] = 1
# res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) # res_me=np.mean(find_num_col_deskew(img_rot,sigma_des,2.0 ))
# neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) # neg_peaks,var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 )
# print(var_spectrum,'var_spectrum') # print(var_spectrum,'var_spectrum')
try: try:
neg_peaks, var_spectrum = self.find_num_col_deskew(img_rot, sigma_des, 20.3) neg_peaks, var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
# print(rot,var_spectrum,'var_spectrum') # print(rot,var_spectrum,'var_spectrum')
res_me = np.mean(neg_peaks) res_me = np.mean(neg_peaks)
if res_me == 0: if res_me == 0:
@ -2791,9 +2689,9 @@ class eynollah:
##plt.imshow(img_rot) ##plt.imshow(img_rot)
##plt.show() ##plt.show()
img_rot[img_rot != 0] = 1 img_rot[img_rot != 0] = 1
# res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) # res_me=np.mean(find_num_col_deskew(img_rot,sigma_des,2.0 ))
try: try:
neg_peaks, var_spectrum = self.find_num_col_deskew(img_rot, sigma_des, 20.3) neg_peaks, var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
# print(indexer,'indexer') # print(indexer,'indexer')
res_me = np.mean(neg_peaks) res_me = np.mean(neg_peaks)
if res_me == 0: if res_me == 0:
@ -2837,9 +2735,9 @@ class eynollah:
##plt.imshow(img_rot) ##plt.imshow(img_rot)
##plt.show() ##plt.show()
img_rot[img_rot != 0] = 1 img_rot[img_rot != 0] = 1
# res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0 )) # res_me=np.mean(find_num_col_deskew(img_rot,sigma_des,2.0 ))
try: try:
neg_peaks, var_spectrum = self.find_num_col_deskew(img_rot, sigma_des, 20.3) neg_peaks, var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
# print(indexer,'indexer') # print(indexer,'indexer')
res_me = np.mean(neg_peaks) res_me = np.mean(neg_peaks)
if res_me == 0: if res_me == 0:

@ -272,3 +272,9 @@ def deskew_erarly(textline_mask):
image_regions_deskewd = image_regions_deskewd + deskewed_new image_regions_deskewd = image_regions_deskewd + deskewed_new
return image_regions_deskewd, slope_true return image_regions_deskewd, slope_true
def get_all_image_patches_coordination(self, image_page):
self.all_box_coord = []
for jk in range(len(self.boxes)):
_, crop_coor = crop_image_inside_box(self.boxes[jk], image_page)
self.all_box_coord.append(crop_coor)

@ -1457,3 +1457,100 @@ def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1):
return layout_no_patch return layout_no_patch
def find_num_col_deskew(regions_without_seperators, sigma_, multiplier=3.8):
regions_without_seperators_0 = regions_without_seperators[:, :].sum(axis=1)
meda_n_updown = regions_without_seperators_0[len(regions_without_seperators_0) :: -1]
first_nonzero = next((i for i, x in enumerate(regions_without_seperators_0) if x), 0)
last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0)
last_nonzero = len(regions_without_seperators_0) - last_nonzero
y = regions_without_seperators_0 # [first_nonzero:last_nonzero]
y_help = np.zeros(len(y) + 20)
y_help[10 : len(y) + 10] = y
x = np.array(range(len(y)))
zneg_rev = -y_help + np.max(y_help)
zneg = np.zeros(len(zneg_rev) + 20)
zneg[10 : len(zneg_rev) + 10] = zneg_rev
z = gaussian_filter1d(y, sigma_)
zneg = gaussian_filter1d(zneg, sigma_)
peaks_neg, _ = find_peaks(zneg, height=0)
peaks, _ = find_peaks(z, height=0)
peaks_neg = peaks_neg - 10 - 10
# print(np.std(z),'np.std(z)np.std(z)np.std(z)')
##plt.plot(z)
##plt.show()
##plt.imshow(regions_without_seperators)
##plt.show()
"""
last_nonzero=last_nonzero-0#100
first_nonzero=first_nonzero+0#+100
peaks_neg=peaks_neg[(peaks_neg>first_nonzero) & (peaks_neg<last_nonzero)]
peaks=peaks[(peaks>.06*regions_without_seperators.shape[1]) & (peaks<0.94*regions_without_seperators.shape[1])]
"""
interest_pos = z[peaks]
interest_pos = interest_pos[interest_pos > 10]
interest_neg = z[peaks_neg]
min_peaks_pos = np.mean(interest_pos)
min_peaks_neg = 0 # np.min(interest_neg)
dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier
# print(interest_pos)
grenze = min_peaks_pos - dis_talaei # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
interest_neg_fin = interest_neg[(interest_neg < grenze)]
peaks_neg_fin = peaks_neg[(interest_neg < grenze)]
interest_neg_fin = interest_neg[(interest_neg < grenze)]
"""
if interest_neg[0]<0.1:
interest_neg=interest_neg[1:]
if interest_neg[len(interest_neg)-1]<0.1:
interest_neg=interest_neg[:len(interest_neg)-1]
min_peaks_pos=np.min(interest_pos)
min_peaks_neg=0#np.min(interest_neg)
dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier
grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
"""
# interest_neg_fin=interest_neg#[(interest_neg<grenze)]
# peaks_neg_fin=peaks_neg#[(interest_neg<grenze)]
# interest_neg_fin=interest_neg#[(interest_neg<grenze)]
num_col = (len(interest_neg_fin)) + 1
p_l = 0
p_u = len(y) - 1
p_m = int(len(y) / 2.0)
p_g_l = int(len(y) / 3.0)
p_g_u = len(y) - int(len(y) / 3.0)
diff_peaks = np.abs(np.diff(peaks_neg_fin))
diff_peaks_annormal = diff_peaks[diff_peaks < 30]
# print(len(interest_neg_fin),np.mean(interest_neg_fin))
return interest_neg_fin, np.std(z)

Loading…
Cancel
Save