Update main.py - robust deskewing and better page extraction

pull/1/head
Rezanezhad, Vahid 5 years ago
parent 0182b7087f
commit a11f6740cb

@ -51,7 +51,7 @@ class textlineerkenner:
self.f_name = self.f_name.split('.')[0] self.f_name = self.f_name.split('.')[0]
self.dir_models = dir_models self.dir_models = dir_models
self.kernel = np.ones((5, 5), np.uint8) self.kernel = np.ones((5, 5), np.uint8)
self.model_page_dir = dir_models + '/model_page.h5' self.model_page_dir = dir_models + '/model_page_new.h5'
self.model_region_dir = dir_models + '/model_strukturerkennung.h5' self.model_region_dir = dir_models + '/model_strukturerkennung.h5'
self.model_textline_dir = dir_models + '/model_textline.h5' self.model_textline_dir = dir_models + '/model_textline.h5'
@ -199,20 +199,32 @@ class textlineerkenner:
self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
elif self.image.shape[0] < 2000 and self.image.shape[0] >= 1000: elif self.image.shape[0] < 2000 and self.image.shape[0] >= 1000:
self.img_hight_int = 3500 self.img_hight_int = int(self.image.shape[0]*1.1)
self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
elif self.image.shape[0] < 3000 and self.image.shape[0] >= 2000: elif self.image.shape[0] < 3300 and self.image.shape[0] >= 2000:
self.img_hight_int = 5500 self.img_hight_int = int(self.image.shape[0]*1.1)
self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3000: elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3300 and self.image.shape[1]<2400 :
self.img_hight_int = int(self.image.shape[0]*1.1)# 6500
self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3300 and self.image.shape[1]>=2400 :
self.img_hight_int = 6500 self.img_hight_int = 6500
self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
elif self.image.shape[0] < 5400 and self.image.shape[0] > 4000 and self.image.shape[1]>3300 :
self.img_hight_int = int(self.image.shape[0]*1.6)# 6500
self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
elif self.image.shape[0] < 11000 and self.image.shape[0] >= 7000 :
self.img_hight_int = int(self.image.shape[0]*1.6)# 6500
self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
else: else:
self.img_hight_int = self.image.shape[0] self.img_hight_int = int(self.image.shape[0]*1.1)# 6500
self.img_width_int = self.image.shape[1] self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
#self.img_hight_int = self.image.shape[0]
#self.img_width_int = self.image.shape[1]
self.scale_y = self.img_hight_int / float(self.image.shape[0]) self.scale_y = self.img_hight_int / float(self.image.shape[0])
self.scale_x = self.img_width_int / float(self.image.shape[1]) self.scale_x = self.img_width_int / float(self.image.shape[1])
@ -391,8 +403,8 @@ class textlineerkenner:
patches=False patches=False
model_page, session_page = self.start_new_session_and_model(self.model_page_dir) model_page, session_page = self.start_new_session_and_model(self.model_page_dir)
img = self.otsu_copy(self.image) img = self.otsu_copy(self.image)
for ii in range(1): #for ii in range(1):
img = cv2.GaussianBlur(img, (15, 15), 0) # img = cv2.GaussianBlur(img, (15, 15), 0)
img_page_prediction=self.do_prediction(patches,img,model_page) img_page_prediction=self.do_prediction(patches,img,model_page)
@ -400,7 +412,7 @@ class textlineerkenner:
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(imgray, 0, 255, 0) _, thresh = cv2.threshold(imgray, 0, 255, 0)
thresh = cv2.dilate(thresh, self.kernel, iterations=3) thresh = cv2.dilate(thresh, self.kernel, iterations=6)
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))])
@ -409,10 +421,22 @@ class textlineerkenner:
x, y, w, h = cv2.boundingRect(cnt) x, y, w, h = cv2.boundingRect(cnt)
try:
box = [x, y, w, h] box = [x, y, w, h]
croped_page, page_coord = self.crop_image_inside_box(box, self.image) croped_page, page_coord = self.crop_image_inside_box(box, self.image)
self.cont_page=[]
self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] ,
[ page_coord[3] , page_coord[0] ] ,
[ page_coord[3] , page_coord[1] ] ,
[ page_coord[2] , page_coord[1] ]] ) )
except:
box = [0, 0, self.image.shape[1]-1, self.image.shape[0]-1]
croped_page, page_coord = self.crop_image_inside_box(box, self.image)
self.cont_page=[] self.cont_page=[]
self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] ,
[ page_coord[3] , page_coord[0] ] , [ page_coord[3] , page_coord[0] ] ,
@ -437,8 +461,10 @@ class textlineerkenner:
img = self.otsu_copy(img) img = self.otsu_copy(img)
img = img.astype(np.uint8) img = img.astype(np.uint8)
prediction_regions=self.do_prediction(patches,img,model_region) prediction_regions=self.do_prediction(patches,img,model_region)
session_region.close() session_region.close()
del model_region del model_region
del session_region del session_region
@ -505,8 +531,8 @@ class textlineerkenner:
self.all_text_region_raw.append(crop_img[:, :, 0]) self.all_text_region_raw.append(crop_img[:, :, 0])
self.area_of_cropped.append(crop_img.shape[0] * crop_img.shape[1]) self.area_of_cropped.append(crop_img.shape[0] * crop_img.shape[1])
def seperate_lines(self, img_path, contour_text_interest, thetha): def seperate_lines(self, img_patch, contour_text_interest, thetha):
(h, w) = img_path.shape[:2] (h, w) = img_patch.shape[:2]
center = (w // 2, h // 2) center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, -thetha, 1.0) M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
x_d = M[0, 2] x_d = M[0, 2]
@ -522,19 +548,19 @@ class textlineerkenner:
y_cont = y_cont - np.min(y_cont) y_cont = y_cont - np.min(y_cont)
x_min_cont = 0 x_min_cont = 0
x_max_cont = img_path.shape[1] x_max_cont = img_patch.shape[1]
y_min_cont = 0 y_min_cont = 0
y_max_cont = img_path.shape[0] y_max_cont = img_patch.shape[0]
xv = np.linspace(x_min_cont, x_max_cont, 1000) xv = np.linspace(x_min_cont, x_max_cont, 1000)
mada_n = img_path.sum(axis=1) textline_patch_sum_along_width = img_patch.sum(axis=1)
first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None))
y = mada_n[:] # [first_nonzero:last_nonzero] y = textline_patch_sum_along_width[:] # [first_nonzero:last_nonzero]
y_help = np.zeros(len(y) + 40) y_padded = np.zeros(len(y) + 40)
y_help[20:len(y) + 20] = y y_padded[20:len(y) + 20] = y
x = np.array(range(len(y))) x = np.array(range(len(y)))
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
@ -544,14 +570,20 @@ class textlineerkenner:
sigma_gaus=8 sigma_gaus=8
z= gaussian_filter1d(y_help, sigma_gaus) y_padded_smoothed= gaussian_filter1d(y_padded, sigma_gaus)
zneg_rev=-y_help+np.max(y_help) y_padded_up_to_down=-y_padded+np.max(y_padded)
zneg=np.zeros(len(zneg_rev)+40) y_padded_up_to_down_padded=np.zeros(len(y_padded_up_to_down)+40)
zneg[20:len(zneg_rev)+20]=zneg_rev y_padded_up_to_down_padded[20:len(y_padded_up_to_down)+20]=y_padded_up_to_down
zneg= gaussian_filter1d(zneg, sigma_gaus) y_padded_up_to_down_padded= gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus)
peaks, _ = find_peaks(y_padded_smoothed, height=0)
peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
mean_value_of_peaks=np.mean(y_padded_smoothed[peaks])
std_value_of_peaks=np.std(y_padded_smoothed[peaks])
peaks_values=y_padded_smoothed[peaks]
peaks, _ = find_peaks(z, height=0)
peaks_neg, _ = find_peaks(zneg, height=0)
peaks_neg = peaks_neg - 20 - 20 peaks_neg = peaks_neg - 20 - 20
peaks = peaks - 20 peaks = peaks - 20
@ -568,21 +600,40 @@ class textlineerkenner:
textline_boxes_rot = [] textline_boxes_rot = []
if len(peaks_neg) == len(peaks) + 1 and len(peaks) >= 3: if len(peaks_neg) == len(peaks) + 1 and len(peaks) >= 3:
#print('11')
for jj in range(len(peaks)): for jj in range(len(peaks)):
if jj==(len(peaks)-1):
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
else:
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int(
1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
else:
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
else:
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
point_down_narrow = peaks[jj] + first_nonzero + int( point_down_narrow = peaks[jj] + first_nonzero + int(
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
if point_down >= img_path.shape[0]:
point_down = img_path.shape[0] - 2
if point_down_narrow >= img_path.shape[0]:
point_down_narrow = img_path.shape[0] - 2 if point_down_narrow >= img_patch.shape[0]:
point_down_narrow = img_patch.shape[0] - 2
distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True)
for mj in range(len(xv))] for mj in range(len(xv))]
@ -672,15 +723,15 @@ class textlineerkenner:
dis_to_next = np.abs(peaks[1] - peaks[0]) dis_to_next = np.abs(peaks[1] - peaks[0])
for jj in range(len(peaks)): for jj in range(len(peaks)):
if jj == 0: if jj == 0:
point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) point_up = 0#peaks[jj] + first_nonzero - int(1. / 1.7 * dis_to_next)
if point_up < 0: if point_up < 0:
point_up = 1 point_up = 1
point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) point_down = peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next)
elif jj == 1: elif jj == 1:
point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) point_down = peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next)
if point_down >= img_path.shape[0]: if point_down >= img_patch.shape[0]:
point_down = img_path.shape[0] - 2 point_down = img_patch.shape[0] - 2
point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) point_up = peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next)
distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True)
for mj in range(len(xv))] for mj in range(len(xv))]
@ -692,8 +743,8 @@ class textlineerkenner:
x_min = x_min_cont x_min = x_min_cont
x_max = x_max_cont x_max = x_max_cont
else: else:
x_min = np.min(xvinside) # max(x_min_interest,x_min_cont) x_min = np.min(xvinside)
x_max = np.max(xvinside) # min(x_max_interest,x_max_cont) x_max = np.max(xvinside)
p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)]) p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)])
p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)]) p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)])
@ -737,9 +788,9 @@ class textlineerkenner:
elif jj == len(peaks) - 1: elif jj == len(peaks) - 1:
dis_to_next = peaks[jj] - peaks[jj - 1] dis_to_next = peaks[jj] - peaks[jj - 1]
# point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next) # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next)
point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) point_down = peaks[jj] + first_nonzero + int(1. / 1.7 * dis_to_next)
if point_down >= img_path.shape[0]: if point_down >= img_patch.shape[0]:
point_down = img_path.shape[0] - 2 point_down = img_patch.shape[0] - 2
# point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next)
point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next)
else: else:
@ -858,65 +909,7 @@ class textlineerkenner:
return contours_rotated_clean return contours_rotated_clean
def textline_contours_to_get_slope_correctly(self, textline_mask, img_patch, contour_interest):
slope_new = 0 # deskew_images(img_patch)
textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
textline_mask = textline_mask.astype(np.uint8)
textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, self.kernel)
textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, self.kernel)
textline_mask = cv2.erode(textline_mask, self.kernel, iterations=1)
imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(imgray, 0, 255, 0)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, self.kernel)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, self.kernel)
contours, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.003)
textline_maskt = textline_mask[:, :, 0]
textline_maskt[textline_maskt != 0] = 1
peaks_point, _ = self.seperate_lines(textline_maskt, contour_interest, slope_new)
mean_dis = np.mean(np.diff(peaks_point))
len_x = thresh.shape[1]
slope_lines = []
contours_slope_new = []
for kk in range(len(main_contours)):
xminh = np.min(main_contours[kk][:, 0])
xmaxh = np.max(main_contours[kk][:, 0])
yminh = np.min(main_contours[kk][:, 1])
ymaxh = np.max(main_contours[kk][:, 1])
if ymaxh - yminh <= mean_dis and (
xmaxh - xminh) >= 0.3 * len_x: # xminh>=0.05*len_x and xminh<=0.4*len_x and xmaxh<=0.95*len_x and xmaxh>=0.6*len_x:
contours_slope_new.append(main_contours[kk])
rows, cols = thresh.shape[:2]
[vx, vy, x, y] = cv2.fitLine(main_contours[kk], cv2.DIST_L2, 0, 0.01, 0.01)
slope_lines.append((vy / vx) / np.pi * 180)
if len(slope_lines) >= 2:
slope = np.mean(slope_lines) # slope_true/np.pi*180
else:
slope = 999
else:
slope = 0
return slope
def return_contours_of_image(self,image_box_tabels_1): def return_contours_of_image(self,image_box_tabels_1):
image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2) image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2)
@ -935,18 +928,18 @@ class textlineerkenner:
def isNaN(self,num): def isNaN(self,num):
return num != num return num != num
def find_num_col(self,regions_without_seperators,sigma_,multiplier=3.8 ): def get_standard_deviation_of_summed_textline_patch_along_width(self,img_patch,sigma_,multiplier=3.8 ):
regions_without_seperators_0=regions_without_seperators[:,:].sum(axis=1) img_patch_sum_along_width=img_patch[:,:].sum(axis=1)
meda_n_updown=regions_without_seperators_0[len(regions_without_seperators_0)::-1] img_patch_sum_along_width_updown=img_patch_sum_along_width[len(img_patch_sum_along_width)::-1]
first_nonzero=(next((i for i, x in enumerate(regions_without_seperators_0) if x), 0)) first_nonzero=(next((i for i, x in enumerate(img_patch_sum_along_width) if x), 0))
last_nonzero=(next((i for i, x in enumerate(meda_n_updown) if x), 0)) last_nonzero=(next((i for i, x in enumerate(img_patch_sum_along_width_updown) if x), 0))
last_nonzero=len(regions_without_seperators_0)-last_nonzero last_nonzero=len(img_patch_sum_along_width)-last_nonzero
y=regions_without_seperators_0#[first_nonzero:last_nonzero] y=img_patch_sum_along_width#[first_nonzero:last_nonzero]
y_help=np.zeros(len(y)+20) y_help=np.zeros(len(y)+20)
@ -972,151 +965,126 @@ class textlineerkenner:
peaks_neg=peaks_neg-10-10 peaks_neg=peaks_neg-10-10
last_nonzero=last_nonzero-0#100
first_nonzero=first_nonzero+0#+100
peaks_neg=peaks_neg[(peaks_neg>first_nonzero) & (peaks_neg<last_nonzero)]
peaks=peaks[(peaks>.06*regions_without_seperators.shape[1]) & (peaks<0.94*regions_without_seperators.shape[1])]
interest_pos=z[peaks] interest_pos=z[peaks]
interest_pos=interest_pos[interest_pos>10] interest_pos=interest_pos[interest_pos>10]
interest_neg=z[peaks_neg] interest_neg=z[peaks_neg]
min_peaks_pos=np.mean(interest_pos)
if interest_neg[0]<0.1:
interest_neg=interest_neg[1:]
if interest_neg[len(interest_neg)-1]<0.1:
interest_neg=interest_neg[:len(interest_neg)-1]
min_peaks_pos=np.min(interest_pos)
min_peaks_neg=0#np.min(interest_neg) min_peaks_neg=0#np.min(interest_neg)
dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier
#print(interest_pos)
grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
interest_neg_fin=interest_neg#[(interest_neg<grenze)] interest_neg_fin=interest_neg[(interest_neg<grenze)]
peaks_neg_fin=peaks_neg#[(interest_neg<grenze)] peaks_neg_fin=peaks_neg[(interest_neg<grenze)]
interest_neg_fin=interest_neg#[(interest_neg<grenze)] interest_neg_fin=interest_neg[(interest_neg<grenze)]
num_col=(len(interest_neg_fin))+1
p_l=0
p_u=len(y)-1
p_m=int(len(y)/2.)
p_g_l=int(len(y)/3.)
p_g_u=len(y)-int(len(y)/3.)
return interest_neg_fin,np.std(z)
diff_peaks=np.abs( np.diff(peaks_neg_fin) ) def return_deskew_slope(self,img_patch,sigma_des):
diff_peaks_annormal=diff_peaks[diff_peaks<30] img_patch_copy=np.zeros((img_patch.shape[0],img_patch.shape[1]))
img_patch_copy[:,:]=img_patch[:,:]#img_patch_org[:,:,0]
img_patch_padded=np.zeros((int( img_patch_copy.shape[0]*(1.2) ) , int( img_patch_copy.shape[1]*(2.6) ) ))
return interest_neg_fin img_patch_padded[ int( img_patch_copy.shape[0]*(.1)):int( img_patch_copy.shape[0]*(.1))+img_patch_copy.shape[0] , int( img_patch_copy.shape[1]*(.8)):int( img_patch_copy.shape[1]*(.8))+img_patch_copy.shape[1] ]=img_patch_copy[:,:]
def return_deskew_slop(self,img_patch_org,sigma_des): angles=np.linspace(-12,12,40)
img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1]))
img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
img_resized=np.zeros((int( img_int.shape[0]*(1.2) ) , int( img_int.shape[1]*(1.2) ) ))
img_resized[ int( img_int.shape[0]*(.1)):int( img_int.shape[0]*(.1))+img_int.shape[0] , int( img_int.shape[1]*(.1)):int( img_int.shape[1]*(.1))+img_int.shape[1] ]=img_int[:,:]
angels=np.linspace(-4,4,60)
res=[] res=[]
num_of_peaks=[]
index_cor=[] index_cor=[]
var_res=[]
indexer=0 indexer=0
for rot in angels: for rot in angles:
img_rot=self.rotate_image(img_resized,rot) img_rotated=self.rotate_image(img_patch_padded,rot)
img_rot[img_rot!=0]=1 img_rotated[img_rotated!=0]=1
res_me=np.mean(self.find_num_col(img_rot,sigma_des,200.3 )) try:
neg_peaks,var_spectrum=self.get_standard_deviation_of_summed_textline_patch_along_width(img_rotated,sigma_des,20.3 )
res_me=np.mean(neg_peaks)
if res_me==0:
res_me=1000000000000000000000
else:
pass
res_num=len(neg_peaks)
except:
res_me=1000000000000000000000
res_num=0
var_spectrum=0
if self.isNaN(res_me): if self.isNaN(res_me):
pass pass
else: else:
res.append( res_me ) res.append( res_me )
var_res.append(var_spectrum)
num_of_peaks.append( res_num )
index_cor.append(indexer) index_cor.append(indexer)
indexer=indexer+1 indexer=indexer+1
res=np.array(res) try:
arg_int=np.argmin(res) var_res=np.array(var_res)
arg_fin=index_cor[arg_int]
ang_int=angels[arg_fin]
img_rot=self.rotate_image(img_resized,ang_int) ang_int=angles[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
img_rot[img_rot!=0]=1 except:
ang_int=0
return ang_int return ang_int
def do_work_of_slopes(self,q,poly,box_sub,boxes_per_process,contours_sub,textline_mask_tot,contours_per_process): def do_work_of_slopes(self,queue_of_slopes_per_textregion,queue_of_textlines_rectangle_per_textregion
slope_biggest=0 ,queue_of_textregion_box,boxes_per_process,queue_of_quntours_of_textregion,textline_mask_tot,contours_per_process):
slopes_sub = []
boxes_sub_new=[] slopes_per_each_subprocess = []
poly_sub=[] bounding_box_of_textregion_per_each_subprocess=[]
contours_sub_per_p=[] textlines_rectangles_per_each_subprocess=[]
contours_textregion_per_each_subprocess=[]
for mv in range(len(boxes_per_process)): for mv in range(len(boxes_per_process)):
contours_sub_per_p.append(contours_per_process[mv]) contours_textregion_per_each_subprocess.append(contours_per_process[mv])
crop_img, _ = self.crop_image_inside_box(boxes_per_process[mv], crop_img, _ = self.crop_image_inside_box(boxes_per_process[mv],
np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2))
crop_img=crop_img[:,:,0] crop_img=crop_img[:,:,0]
crop_img=cv2.erode(crop_img,self.kernel,iterations = 2) crop_img=cv2.erode(crop_img,self.kernel,iterations = 2)
try: try:
textline_con,hierachy=self.return_contours_of_image(crop_img) sigma_des=2
textline_con_fil=self.filter_contours_area_of_image(crop_img,textline_con,hierachy,max_area=1,min_area=0.0008) slope_corresponding_textregion=self.return_deskew_slope(crop_img,sigma_des)
y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil)
sigma_des=int( y_diff_mean * (4./40.0) )
if sigma_des<1:
sigma_des=1
crop_img[crop_img>0]=1
slope_corresponding_textregion=self.return_deskew_slop(crop_img,sigma_des)
except: except:
slope_corresponding_textregion=999 slope_corresponding_textregion=999
if np.abs(slope_corresponding_textregion)>12.5 and slope_corresponding_textregion!=999: if np.abs(slope_corresponding_textregion)>12.5 and slope_corresponding_textregion!=999:
slope_corresponding_textregion=slope_biggest slope_corresponding_textregion=0
elif slope_corresponding_textregion==999: elif slope_corresponding_textregion==999:
slope_corresponding_textregion=slope_biggest slope_corresponding_textregion=0
slopes_sub.append(slope_corresponding_textregion) slopes_per_each_subprocess.append(slope_corresponding_textregion)
cnt_clean_rot = self.textline_contours_postprocessing(crop_img bounding_rectangle_of_textlines = self.textline_contours_postprocessing(crop_img
, slope_corresponding_textregion, , slope_corresponding_textregion,
contours_per_process[mv], boxes_per_process[mv]) contours_per_process[mv], boxes_per_process[mv])
poly_sub.append(cnt_clean_rot) textlines_rectangles_per_each_subprocess.append(bounding_rectangle_of_textlines)
boxes_sub_new.append(boxes_per_process[mv] ) bounding_box_of_textregion_per_each_subprocess.append(boxes_per_process[mv] )
q.put(slopes_sub) queue_of_slopes_per_textregion.put(slopes_per_each_subprocess)
poly.put(poly_sub) queue_of_textlines_rectangle_per_textregion.put(textlines_rectangles_per_each_subprocess)
box_sub.put(boxes_sub_new ) queue_of_textregion_box.put(bounding_box_of_textregion_per_each_subprocess )
contours_sub.put(contours_sub_per_p) queue_of_quntours_of_textregion.put(contours_textregion_per_each_subprocess)
def get_slopes_and_deskew(self, contours,textline_mask_tot): def get_slopes_and_deskew(self, contours,textline_mask_tot):
slope_biggest=0#self.return_deskew_slop(img_int_p,sigma_des)
num_cores = cpu_count() num_cores = cpu_count()
q = Queue()
poly=Queue() queue_of_slopes_per_textregion = Queue()
box_sub=Queue() queue_of_textlines_rectangle_per_textregion=Queue()
contours_sub=Queue() queue_of_textregion_box=Queue()
queue_of_quntours_of_textregion=Queue()
processes = [] processes = []
nh=np.linspace(0, len(self.boxes), num_cores+1) nh=np.linspace(0, len(self.boxes), num_cores+1)
@ -1125,7 +1093,8 @@ class textlineerkenner:
for i in range(num_cores): for i in range(num_cores):
boxes_per_process=self.boxes[int(nh[i]):int(nh[i+1])] boxes_per_process=self.boxes[int(nh[i]):int(nh[i+1])]
contours_per_process=contours[int(nh[i]):int(nh[i+1])] contours_per_process=contours[int(nh[i]):int(nh[i+1])]
processes.append(Process(target=self.do_work_of_slopes, args=(q,poly,box_sub, boxes_per_process, contours_sub, textline_mask_tot, contours_per_process))) processes.append(Process(target=self.do_work_of_slopes, args=(queue_of_slopes_per_textregion,queue_of_textlines_rectangle_per_textregion,
queue_of_textregion_box, boxes_per_process, queue_of_quntours_of_textregion, textline_mask_tot, contours_per_process)))
for i in range(num_cores): for i in range(num_cores):
processes[i].start() processes[i].start()
@ -1136,10 +1105,10 @@ class textlineerkenner:
self.boxes=[] self.boxes=[]
for i in range(num_cores): for i in range(num_cores):
slopes_for_sub_process=q.get(True) slopes_for_sub_process=queue_of_slopes_per_textregion.get(True)
boxes_for_sub_process=box_sub.get(True) boxes_for_sub_process=queue_of_textregion_box.get(True)
polys_for_sub_process=poly.get(True) polys_for_sub_process=queue_of_textlines_rectangle_per_textregion.get(True)
contours_for_subprocess=contours_sub.get(True) contours_for_subprocess=queue_of_quntours_of_textregion.get(True)
for j in range(len(slopes_for_sub_process)): for j in range(len(slopes_for_sub_process)):
self.slopes.append(slopes_for_sub_process[j]) self.slopes.append(slopes_for_sub_process[j])
@ -1154,11 +1123,11 @@ class textlineerkenner:
def order_of_regions(self, textline_mask,contours_main): def order_of_regions(self, textline_mask,contours_main):
mada_n=textline_mask.sum(axis=1) textline_sum_along_width=textline_mask.sum(axis=1)
y=mada_n[:]
y_help=np.zeros(len(y)+40) y=textline_sum_along_width[:]
y_help[20:len(y)+20]=y y_padded=np.zeros(len(y)+40)
y_padded[20:len(y)+20]=y
x=np.array( range(len(y)) ) x=np.array( range(len(y)) )
@ -1167,8 +1136,8 @@ class textlineerkenner:
sigma_gaus=8 sigma_gaus=8
z= gaussian_filter1d(y_help, sigma_gaus) z= gaussian_filter1d(y_padded, sigma_gaus)
zneg_rev=-y_help+np.max(y_help) zneg_rev=-y_padded+np.max(y_padded)
zneg=np.zeros(len(zneg_rev)+40) zneg=np.zeros(len(zneg_rev)+40)
zneg[20:len(zneg_rev)+20]=zneg_rev zneg[20:len(zneg_rev)+20]=zneg_rev
@ -1424,6 +1393,12 @@ class textlineerkenner:
# extract text regions and corresponding contours and surrounding box # extract text regions and corresponding contours and surrounding box
text_regions=self.extract_text_regions(image_page) text_regions=self.extract_text_regions(image_page)
text_regions = cv2.erode(text_regions, self.kernel, iterations=3)
text_regions = cv2.dilate(text_regions, self.kernel, iterations=4)
#plt.imshow(text_regions[:,:,0])
#plt.show()
contours=self.get_text_region_contours_and_boxes(text_regions) contours=self.get_text_region_contours_and_boxes(text_regions)
@ -1441,9 +1416,6 @@ class textlineerkenner:
# extracting textlines using segmentation # extracting textlines using segmentation
textline_mask_tot=self.textline_contours(image_page) textline_mask_tot=self.textline_contours(image_page)
#print(textline_mask_tot)
#plt.imshow(textline_mask_tot)
#plt.show()
########## ##########
K.clear_session() K.clear_session()
gc.collect() gc.collect()
@ -1495,7 +1467,6 @@ class textlineerkenner:
@click.command() @click.command()
@click.option('--image', '-i', help='image filename', type=click.Path(exists=True, dir_okay=False)) @click.option('--image', '-i', help='image filename', type=click.Path(exists=True, dir_okay=False))
@click.option('--out', '-o', help='directory to write output xml data', type=click.Path(exists=True, file_okay=False)) @click.option('--out', '-o', help='directory to write output xml data', type=click.Path(exists=True, file_okay=False))
@ -1509,3 +1480,4 @@ def main(image, out, model):
if __name__ == "__main__": if __name__ == "__main__":
main() main()

Loading…
Cancel
Save