From a11f6740cb30062c9fb19dc72df768cc5552e73a Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 28 Nov 2019 16:19:44 +0100 Subject: [PATCH] Update main.py - robust deskewing and better page extraction --- qurator/sbb_textline_detector/main.py | 428 ++++++++++++-------------- 1 file changed, 200 insertions(+), 228 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 44399f1..1f78aaf 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -51,7 +51,7 @@ class textlineerkenner: self.f_name = self.f_name.split('.')[0] self.dir_models = dir_models self.kernel = np.ones((5, 5), np.uint8) - self.model_page_dir = dir_models + '/model_page.h5' + self.model_page_dir = dir_models + '/model_page_new.h5' self.model_region_dir = dir_models + '/model_strukturerkennung.h5' self.model_textline_dir = dir_models + '/model_textline.h5' @@ -199,20 +199,32 @@ class textlineerkenner: self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) elif self.image.shape[0] < 2000 and self.image.shape[0] >= 1000: - self.img_hight_int = 3500 + self.img_hight_int = int(self.image.shape[0]*1.1) self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) - elif self.image.shape[0] < 3000 and self.image.shape[0] >= 2000: - self.img_hight_int = 5500 + elif self.image.shape[0] < 3300 and self.image.shape[0] >= 2000: + self.img_hight_int = int(self.image.shape[0]*1.1) self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) - elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3000: + elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3300 and self.image.shape[1]<2400 : + self.img_hight_int = int(self.image.shape[0]*1.1)# 6500 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + + elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3300 and self.image.shape[1]>=2400 : self.img_hight_int = 6500 self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) - + + elif self.image.shape[0] < 5400 and self.image.shape[0] > 4000 and self.image.shape[1]>3300 : + self.img_hight_int = int(self.image.shape[0]*1.6)# 6500 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + elif self.image.shape[0] < 11000 and self.image.shape[0] >= 7000 : + self.img_hight_int = int(self.image.shape[0]*1.6)# 6500 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) else: - self.img_hight_int = self.image.shape[0] - self.img_width_int = self.image.shape[1] + self.img_hight_int = int(self.image.shape[0]*1.1)# 6500 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + #self.img_hight_int = self.image.shape[0] + #self.img_width_int = self.image.shape[1] self.scale_y = self.img_hight_int / float(self.image.shape[0]) self.scale_x = self.img_width_int / float(self.image.shape[1]) @@ -391,8 +403,8 @@ class textlineerkenner: patches=False model_page, session_page = self.start_new_session_and_model(self.model_page_dir) img = self.otsu_copy(self.image) - for ii in range(1): - img = cv2.GaussianBlur(img, (15, 15), 0) + #for ii in range(1): + # img = cv2.GaussianBlur(img, (15, 15), 0) img_page_prediction=self.do_prediction(patches,img,model_page) @@ -400,7 +412,7 @@ class textlineerkenner: imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) - thresh = cv2.dilate(thresh, self.kernel, iterations=3) + thresh = cv2.dilate(thresh, self.kernel, iterations=6) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) @@ -408,16 +420,28 @@ class textlineerkenner: cnt = contours[np.argmax(cnt_size)] x, y, w, h = cv2.boundingRect(cnt) + + try: + box = [x, y, w, h] + + croped_page, page_coord = self.crop_image_inside_box(box, self.image) + - box = [x, y, w, h] + self.cont_page=[] + self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , + [ page_coord[3] , page_coord[0] ] , + [ page_coord[3] , page_coord[1] ] , + [ page_coord[2] , page_coord[1] ]] ) ) + except: + box = [0, 0, self.image.shape[1]-1, self.image.shape[0]-1] + croped_page, page_coord = self.crop_image_inside_box(box, self.image) + - croped_page, page_coord = self.crop_image_inside_box(box, self.image) - - self.cont_page=[] - self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , - [ page_coord[3] , page_coord[0] ] , - [ page_coord[3] , page_coord[1] ] , - [ page_coord[2] , page_coord[1] ]] ) ) + self.cont_page=[] + self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , + [ page_coord[3] , page_coord[0] ] , + [ page_coord[3] , page_coord[1] ] , + [ page_coord[2] , page_coord[1] ]] ) ) session_page.close() del model_page @@ -437,8 +461,10 @@ class textlineerkenner: img = self.otsu_copy(img) img = img.astype(np.uint8) + prediction_regions=self.do_prediction(patches,img,model_region) + session_region.close() del model_region del session_region @@ -505,8 +531,8 @@ class textlineerkenner: self.all_text_region_raw.append(crop_img[:, :, 0]) self.area_of_cropped.append(crop_img.shape[0] * crop_img.shape[1]) - def seperate_lines(self, img_path, contour_text_interest, thetha): - (h, w) = img_path.shape[:2] + def seperate_lines(self, img_patch, contour_text_interest, thetha): + (h, w) = img_patch.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, -thetha, 1.0) x_d = M[0, 2] @@ -522,19 +548,19 @@ class textlineerkenner: y_cont = y_cont - np.min(y_cont) x_min_cont = 0 - x_max_cont = img_path.shape[1] + x_max_cont = img_patch.shape[1] y_min_cont = 0 - y_max_cont = img_path.shape[0] + y_max_cont = img_patch.shape[0] xv = np.linspace(x_min_cont, x_max_cont, 1000) - mada_n = img_path.sum(axis=1) + textline_patch_sum_along_width = img_patch.sum(axis=1) first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) - y = mada_n[:] # [first_nonzero:last_nonzero] - y_help = np.zeros(len(y) + 40) - y_help[20:len(y) + 20] = y + y = textline_patch_sum_along_width[:] # [first_nonzero:last_nonzero] + y_padded = np.zeros(len(y) + 40) + y_padded[20:len(y) + 20] = y x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) @@ -544,14 +570,20 @@ class textlineerkenner: sigma_gaus=8 - z= gaussian_filter1d(y_help, sigma_gaus) - zneg_rev=-y_help+np.max(y_help) - zneg=np.zeros(len(zneg_rev)+40) - zneg[20:len(zneg_rev)+20]=zneg_rev - zneg= gaussian_filter1d(zneg, sigma_gaus) + y_padded_smoothed= gaussian_filter1d(y_padded, sigma_gaus) + y_padded_up_to_down=-y_padded+np.max(y_padded) + y_padded_up_to_down_padded=np.zeros(len(y_padded_up_to_down)+40) + y_padded_up_to_down_padded[20:len(y_padded_up_to_down)+20]=y_padded_up_to_down + y_padded_up_to_down_padded= gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus) + - peaks, _ = find_peaks(z, height=0) - peaks_neg, _ = find_peaks(zneg, height=0) + peaks, _ = find_peaks(y_padded_smoothed, height=0) + peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0) + + mean_value_of_peaks=np.mean(y_padded_smoothed[peaks]) + std_value_of_peaks=np.std(y_padded_smoothed[peaks]) + peaks_values=y_padded_smoothed[peaks] + peaks_neg = peaks_neg - 20 - 20 peaks = peaks - 20 @@ -568,21 +600,40 @@ class textlineerkenner: textline_boxes_rot = [] if len(peaks_neg) == len(peaks) + 1 and len(peaks) >= 3: + #print('11') for jj in range(len(peaks)): - dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) - dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) + + if jj==(len(peaks)-1): + dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) + dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) + + if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: + point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) + point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + else: + point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) + point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) - point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down_narrow = peaks[jj] + first_nonzero + int( + 1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + else: + dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) + dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) + + if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: + point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + else: + point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) - point_down_narrow = peaks[jj] + first_nonzero + int( - 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + point_down_narrow = peaks[jj] + first_nonzero + int( + 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) - if point_down >= img_path.shape[0]: - point_down = img_path.shape[0] - 2 - if point_down_narrow >= img_path.shape[0]: - point_down_narrow = img_path.shape[0] - 2 + + if point_down_narrow >= img_patch.shape[0]: + point_down_narrow = img_patch.shape[0] - 2 distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) for mj in range(len(xv))] @@ -672,15 +723,15 @@ class textlineerkenner: dis_to_next = np.abs(peaks[1] - peaks[0]) for jj in range(len(peaks)): if jj == 0: - point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) + point_up = 0#peaks[jj] + first_nonzero - int(1. / 1.7 * dis_to_next) if point_up < 0: point_up = 1 - point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) + point_down = peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next) elif jj == 1: - point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) - if point_down >= img_path.shape[0]: - point_down = img_path.shape[0] - 2 - point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) + point_down = peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next) + if point_down >= img_patch.shape[0]: + point_down = img_patch.shape[0] - 2 + point_up = peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next) distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) for mj in range(len(xv))] @@ -692,8 +743,8 @@ class textlineerkenner: x_min = x_min_cont x_max = x_max_cont else: - x_min = np.min(xvinside) # max(x_min_interest,x_min_cont) - x_max = np.max(xvinside) # min(x_max_interest,x_max_cont) + x_min = np.min(xvinside) + x_max = np.max(xvinside) p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)]) p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)]) @@ -737,9 +788,9 @@ class textlineerkenner: elif jj == len(peaks) - 1: dis_to_next = peaks[jj] - peaks[jj - 1] # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next) - point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) - if point_down >= img_path.shape[0]: - point_down = img_path.shape[0] - 2 + point_down = peaks[jj] + first_nonzero + int(1. / 1.7 * dis_to_next) + if point_down >= img_patch.shape[0]: + point_down = img_patch.shape[0] - 2 # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) else: @@ -858,65 +909,7 @@ class textlineerkenner: return contours_rotated_clean - def textline_contours_to_get_slope_correctly(self, textline_mask, img_patch, contour_interest): - - slope_new = 0 # deskew_images(img_patch) - - textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 - - textline_mask = textline_mask.astype(np.uint8) - textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, self.kernel) - textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, self.kernel) - textline_mask = cv2.erode(textline_mask, self.kernel, iterations=1) - imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY) - _, thresh = cv2.threshold(imgray, 0, 255, 0) - - thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, self.kernel) - thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, self.kernel) - - contours, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.003) - - textline_maskt = textline_mask[:, :, 0] - textline_maskt[textline_maskt != 0] = 1 - - peaks_point, _ = self.seperate_lines(textline_maskt, contour_interest, slope_new) - - mean_dis = np.mean(np.diff(peaks_point)) - - len_x = thresh.shape[1] - - slope_lines = [] - contours_slope_new = [] - for kk in range(len(main_contours)): - - xminh = np.min(main_contours[kk][:, 0]) - xmaxh = np.max(main_contours[kk][:, 0]) - yminh = np.min(main_contours[kk][:, 1]) - ymaxh = np.max(main_contours[kk][:, 1]) - - - if ymaxh - yminh <= mean_dis and ( - xmaxh - xminh) >= 0.3 * len_x: # xminh>=0.05*len_x and xminh<=0.4*len_x and xmaxh<=0.95*len_x and xmaxh>=0.6*len_x: - contours_slope_new.append(main_contours[kk]) - - rows, cols = thresh.shape[:2] - [vx, vy, x, y] = cv2.fitLine(main_contours[kk], cv2.DIST_L2, 0, 0.01, 0.01) - - slope_lines.append((vy / vx) / np.pi * 180) - - if len(slope_lines) >= 2: - - slope = np.mean(slope_lines) # slope_true/np.pi*180 - else: - slope = 999 - - else: - slope = 0 - - return slope def return_contours_of_image(self,image_box_tabels_1): image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2) @@ -935,18 +928,18 @@ class textlineerkenner: def isNaN(self,num): return num != num - def find_num_col(self,regions_without_seperators,sigma_,multiplier=3.8 ): - regions_without_seperators_0=regions_without_seperators[:,:].sum(axis=1) + def get_standard_deviation_of_summed_textline_patch_along_width(self,img_patch,sigma_,multiplier=3.8 ): + img_patch_sum_along_width=img_patch[:,:].sum(axis=1) - meda_n_updown=regions_without_seperators_0[len(regions_without_seperators_0)::-1] + img_patch_sum_along_width_updown=img_patch_sum_along_width[len(img_patch_sum_along_width)::-1] - first_nonzero=(next((i for i, x in enumerate(regions_without_seperators_0) if x), 0)) - last_nonzero=(next((i for i, x in enumerate(meda_n_updown) if x), 0)) + first_nonzero=(next((i for i, x in enumerate(img_patch_sum_along_width) if x), 0)) + last_nonzero=(next((i for i, x in enumerate(img_patch_sum_along_width_updown) if x), 0)) - last_nonzero=len(regions_without_seperators_0)-last_nonzero + last_nonzero=len(img_patch_sum_along_width)-last_nonzero - y=regions_without_seperators_0#[first_nonzero:last_nonzero] + y=img_patch_sum_along_width#[first_nonzero:last_nonzero] y_help=np.zeros(len(y)+20) @@ -971,152 +964,127 @@ class textlineerkenner: peaks, _ = find_peaks(z, height=0) peaks_neg=peaks_neg-10-10 - - - - last_nonzero=last_nonzero-0#100 - first_nonzero=first_nonzero+0#+100 - - peaks_neg=peaks_neg[(peaks_neg>first_nonzero) & (peaks_neg.06*regions_without_seperators.shape[1]) & (peaks<0.94*regions_without_seperators.shape[1])] - interest_pos=z[peaks] interest_pos=interest_pos[interest_pos>10] - - interest_neg=z[peaks_neg] - - - if interest_neg[0]<0.1: - interest_neg=interest_neg[1:] - if interest_neg[len(interest_neg)-1]<0.1: - interest_neg=interest_neg[:len(interest_neg)-1] - + interest_neg=z[peaks_neg] - min_peaks_pos=np.min(interest_pos) + min_peaks_pos=np.mean(interest_pos) min_peaks_neg=0#np.min(interest_neg) - dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier + #print(interest_pos) grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 - interest_neg_fin=interest_neg#[(interest_neg0]=1 - slope_corresponding_textregion=self.return_deskew_slop(crop_img,sigma_des) - - + sigma_des=2 + slope_corresponding_textregion=self.return_deskew_slope(crop_img,sigma_des) except: slope_corresponding_textregion=999 if np.abs(slope_corresponding_textregion)>12.5 and slope_corresponding_textregion!=999: - slope_corresponding_textregion=slope_biggest + slope_corresponding_textregion=0 elif slope_corresponding_textregion==999: - slope_corresponding_textregion=slope_biggest - slopes_sub.append(slope_corresponding_textregion) + slope_corresponding_textregion=0 + slopes_per_each_subprocess.append(slope_corresponding_textregion) - cnt_clean_rot = self.textline_contours_postprocessing(crop_img + bounding_rectangle_of_textlines = self.textline_contours_postprocessing(crop_img , slope_corresponding_textregion, contours_per_process[mv], boxes_per_process[mv]) - poly_sub.append(cnt_clean_rot) - boxes_sub_new.append(boxes_per_process[mv] ) + textlines_rectangles_per_each_subprocess.append(bounding_rectangle_of_textlines) + bounding_box_of_textregion_per_each_subprocess.append(boxes_per_process[mv] ) - q.put(slopes_sub) - poly.put(poly_sub) - box_sub.put(boxes_sub_new ) - contours_sub.put(contours_sub_per_p) + queue_of_slopes_per_textregion.put(slopes_per_each_subprocess) + queue_of_textlines_rectangle_per_textregion.put(textlines_rectangles_per_each_subprocess) + queue_of_textregion_box.put(bounding_box_of_textregion_per_each_subprocess ) + queue_of_quntours_of_textregion.put(contours_textregion_per_each_subprocess) def get_slopes_and_deskew(self, contours,textline_mask_tot): - - slope_biggest=0#self.return_deskew_slop(img_int_p,sigma_des) - num_cores = cpu_count() - q = Queue() - poly=Queue() - box_sub=Queue() - contours_sub=Queue() + + queue_of_slopes_per_textregion = Queue() + queue_of_textlines_rectangle_per_textregion=Queue() + queue_of_textregion_box=Queue() + queue_of_quntours_of_textregion=Queue() processes = [] nh=np.linspace(0, len(self.boxes), num_cores+1) @@ -1125,7 +1093,8 @@ class textlineerkenner: for i in range(num_cores): boxes_per_process=self.boxes[int(nh[i]):int(nh[i+1])] contours_per_process=contours[int(nh[i]):int(nh[i+1])] - processes.append(Process(target=self.do_work_of_slopes, args=(q,poly,box_sub, boxes_per_process, contours_sub, textline_mask_tot, contours_per_process))) + processes.append(Process(target=self.do_work_of_slopes, args=(queue_of_slopes_per_textregion,queue_of_textlines_rectangle_per_textregion, + queue_of_textregion_box, boxes_per_process, queue_of_quntours_of_textregion, textline_mask_tot, contours_per_process))) for i in range(num_cores): processes[i].start() @@ -1136,10 +1105,10 @@ class textlineerkenner: self.boxes=[] for i in range(num_cores): - slopes_for_sub_process=q.get(True) - boxes_for_sub_process=box_sub.get(True) - polys_for_sub_process=poly.get(True) - contours_for_subprocess=contours_sub.get(True) + slopes_for_sub_process=queue_of_slopes_per_textregion.get(True) + boxes_for_sub_process=queue_of_textregion_box.get(True) + polys_for_sub_process=queue_of_textlines_rectangle_per_textregion.get(True) + contours_for_subprocess=queue_of_quntours_of_textregion.get(True) for j in range(len(slopes_for_sub_process)): self.slopes.append(slopes_for_sub_process[j]) @@ -1154,11 +1123,11 @@ class textlineerkenner: def order_of_regions(self, textline_mask,contours_main): - mada_n=textline_mask.sum(axis=1) - y=mada_n[:] - - y_help=np.zeros(len(y)+40) - y_help[20:len(y)+20]=y + textline_sum_along_width=textline_mask.sum(axis=1) + + y=textline_sum_along_width[:] + y_padded=np.zeros(len(y)+40) + y_padded[20:len(y)+20]=y x=np.array( range(len(y)) ) @@ -1167,8 +1136,8 @@ class textlineerkenner: sigma_gaus=8 - z= gaussian_filter1d(y_help, sigma_gaus) - zneg_rev=-y_help+np.max(y_help) + z= gaussian_filter1d(y_padded, sigma_gaus) + zneg_rev=-y_padded+np.max(y_padded) zneg=np.zeros(len(zneg_rev)+40) zneg[20:len(zneg_rev)+20]=zneg_rev @@ -1423,6 +1392,12 @@ class textlineerkenner: # extract text regions and corresponding contours and surrounding box text_regions=self.extract_text_regions(image_page) + + text_regions = cv2.erode(text_regions, self.kernel, iterations=3) + text_regions = cv2.dilate(text_regions, self.kernel, iterations=4) + + #plt.imshow(text_regions[:,:,0]) + #plt.show() contours=self.get_text_region_contours_and_boxes(text_regions) @@ -1441,9 +1416,6 @@ class textlineerkenner: # extracting textlines using segmentation textline_mask_tot=self.textline_contours(image_page) - #print(textline_mask_tot) - #plt.imshow(textline_mask_tot) - #plt.show() ########## K.clear_session() gc.collect() @@ -1493,7 +1465,6 @@ class textlineerkenner: print( "time needed to get order of regions = "+"{0:.2f}".format(t6-t5) ) print( "time needed to implement deskewing = "+"{0:.2f}".format(t7-t6) ) - @click.command() @@ -1509,3 +1480,4 @@ def main(image, out, model): if __name__ == "__main__": main() +