Update main.py - robust deskewing and better page extraction

2026-07-13 06:49:11 +02:00 · 2019-11-28 16:19:44 +01:00 · 2019-11-28 16:19:44 +01:00 · a11f6740cb
commit a11f6740cb
parent 0182b7087f
1 changed files with 201 additions and 229 deletions
--- a/qurator/sbb_textline_detector/main.py
+++ b/qurator/sbb_textline_detector/main.py
@ -51,7 +51,7 @@ class textlineerkenner:
                self.f_name = self.f_name.split('.')[0]
        self.dir_models = dir_models
        self.kernel = np.ones((5, 5), np.uint8)
-        self.model_page_dir = dir_models + '/model_page.h5'
+        self.model_page_dir = dir_models + '/model_page_new.h5'
        self.model_region_dir = dir_models + '/model_strukturerkennung.h5'
        self.model_textline_dir = dir_models + '/model_textline.h5'
@ -199,20 +199,32 @@ class textlineerkenner:
            self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
        elif self.image.shape[0] < 2000 and self.image.shape[0] >= 1000:
-            self.img_hight_int = 3500
+            self.img_hight_int = int(self.image.shape[0]*1.1)
            self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
-        elif self.image.shape[0] < 3000 and self.image.shape[0] >= 2000:
+        elif self.image.shape[0] < 3300 and self.image.shape[0] >= 2000:
-            self.img_hight_int = 5500
+            self.img_hight_int = int(self.image.shape[0]*1.1)
            self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
-        elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3000:
+        elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3300 and self.image.shape[1]<2400 :
            self.img_hight_int = int(self.image.shape[0]*1.1)# 6500
            self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
        elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3300 and self.image.shape[1]>=2400 :
            self.img_hight_int = 6500
            self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
        elif self.image.shape[0] < 5400 and self.image.shape[0] > 4000 and self.image.shape[1]>3300 :
            self.img_hight_int = int(self.image.shape[0]*1.6)# 6500
            self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
        elif self.image.shape[0] < 11000 and self.image.shape[0] >= 7000 :
            self.img_hight_int = int(self.image.shape[0]*1.6)# 6500
            self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
        else:
-            self.img_hight_int = self.image.shape[0]
+            self.img_hight_int = int(self.image.shape[0]*1.1)# 6500
-            self.img_width_int = self.image.shape[1]
+            self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0]))
            #self.img_hight_int = self.image.shape[0]
            #self.img_width_int = self.image.shape[1]
        self.scale_y = self.img_hight_int / float(self.image.shape[0])
        self.scale_x = self.img_width_int / float(self.image.shape[1])
@ -391,8 +403,8 @@ class textlineerkenner:
        patches=False
        model_page, session_page = self.start_new_session_and_model(self.model_page_dir)
        img = self.otsu_copy(self.image)
-        for ii in range(1):
+        #for ii in range(1):
-            img = cv2.GaussianBlur(img, (15, 15), 0)
+        #    img = cv2.GaussianBlur(img, (15, 15), 0)
        img_page_prediction=self.do_prediction(patches,img,model_page)
@ -400,7 +412,7 @@ class textlineerkenner:
        imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(imgray, 0, 255, 0)
-        thresh = cv2.dilate(thresh, self.kernel, iterations=3)
+        thresh = cv2.dilate(thresh, self.kernel, iterations=6)
        contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))])
@ -409,10 +421,22 @@ class textlineerkenner:
        x, y, w, h = cv2.boundingRect(cnt)
        try:
            box = [x, y, w, h]
            croped_page, page_coord = self.crop_image_inside_box(box, self.image)
            self.cont_page=[]
            self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , 
                                                        [ page_coord[3] , page_coord[0] ] ,
                                                        [ page_coord[3] , page_coord[1] ] ,
                                                    [ page_coord[2] , page_coord[1] ]] ) )
        except:
            box = [0, 0, self.image.shape[1]-1, self.image.shape[0]-1]
            croped_page, page_coord = self.crop_image_inside_box(box, self.image)
            self.cont_page=[]
            self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , 
                                                        [ page_coord[3] , page_coord[0] ] ,
@ -437,8 +461,10 @@ class textlineerkenner:
        img = self.otsu_copy(img)
        img = img.astype(np.uint8)
        prediction_regions=self.do_prediction(patches,img,model_region)
        session_region.close()
        del model_region
        del session_region
@ -505,8 +531,8 @@ class textlineerkenner:
            self.all_text_region_raw.append(crop_img[:, :, 0])
            self.area_of_cropped.append(crop_img.shape[0] * crop_img.shape[1])
-    def seperate_lines(self, img_path, contour_text_interest, thetha):
+    def seperate_lines(self, img_patch, contour_text_interest, thetha):
-        (h, w) = img_path.shape[:2]
+        (h, w) = img_patch.shape[:2]
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
        x_d = M[0, 2]
@ -522,19 +548,19 @@ class textlineerkenner:
        y_cont = y_cont - np.min(y_cont)
        x_min_cont = 0
-        x_max_cont = img_path.shape[1]
+        x_max_cont = img_patch.shape[1]
        y_min_cont = 0
-        y_max_cont = img_path.shape[0]
+        y_max_cont = img_patch.shape[0]
        xv = np.linspace(x_min_cont, x_max_cont, 1000)
-        mada_n = img_path.sum(axis=1)
+        textline_patch_sum_along_width = img_patch.sum(axis=1)
        first_nonzero = 0  # (next((i for i, x in enumerate(mada_n) if x), None))
-        y = mada_n[:]  # [first_nonzero:last_nonzero]
+        y = textline_patch_sum_along_width[:]  # [first_nonzero:last_nonzero]
-        y_help = np.zeros(len(y) + 40)
+        y_padded = np.zeros(len(y) + 40)
-        y_help[20:len(y) + 20] = y
+        y_padded[20:len(y) + 20] = y
        x = np.array(range(len(y)))
        peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
@ -544,14 +570,20 @@ class textlineerkenner:
            sigma_gaus=8
-        z= gaussian_filter1d(y_help, sigma_gaus)
+        y_padded_smoothed= gaussian_filter1d(y_padded, sigma_gaus)
-        zneg_rev=-y_help+np.max(y_help)
+        y_padded_up_to_down=-y_padded+np.max(y_padded)
-        zneg=np.zeros(len(zneg_rev)+40)
+        y_padded_up_to_down_padded=np.zeros(len(y_padded_up_to_down)+40)
-        zneg[20:len(zneg_rev)+20]=zneg_rev
+        y_padded_up_to_down_padded[20:len(y_padded_up_to_down)+20]=y_padded_up_to_down
-        zneg= gaussian_filter1d(zneg, sigma_gaus)
+        y_padded_up_to_down_padded= gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus)
        peaks, _ = find_peaks(y_padded_smoothed, height=0)
        peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
        mean_value_of_peaks=np.mean(y_padded_smoothed[peaks])
        std_value_of_peaks=np.std(y_padded_smoothed[peaks])
        peaks_values=y_padded_smoothed[peaks]
        peaks, _ = find_peaks(z, height=0)
        peaks_neg, _ = find_peaks(zneg, height=0)
        peaks_neg = peaks_neg - 20 - 20
        peaks = peaks - 20
@ -568,21 +600,40 @@ class textlineerkenner:
        textline_boxes_rot = []
        if len(peaks_neg) == len(peaks) + 1 and len(peaks) >= 3:
            #print('11')
            for jj in range(len(peaks)):
                if jj==(len(peaks)-1):
                    dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
                    dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
                    if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
                        point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
                        point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
                    else:
                        point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
                        point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
                    point_down_narrow = peaks[jj] + first_nonzero + int(
                        1.4 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
                else:
                    dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
                    dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
                    if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.:
                        point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
                        point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
                    else:
                        point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up)  ##+int(dis_to_next_up*1./4.0)
                        point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down)  ###-int(dis_to_next_down*1./4.0)
                    point_down_narrow = peaks[jj] + first_nonzero + int(
                        1.1 * dis_to_next_down)  ###-int(dis_to_next_down*1./2)
                if point_down >= img_path.shape[0]:
                    point_down = img_path.shape[0] - 2
-                if point_down_narrow >= img_path.shape[0]:
+
-                    point_down_narrow = img_path.shape[0] - 2
+                if point_down_narrow >= img_patch.shape[0]:
                    point_down_narrow = img_patch.shape[0] - 2
                distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True)
                             for mj in range(len(xv))]
@ -672,15 +723,15 @@ class textlineerkenner:
            dis_to_next = np.abs(peaks[1] - peaks[0])
            for jj in range(len(peaks)):
                if jj == 0:
-                    point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next)
+                    point_up = 0#peaks[jj] + first_nonzero - int(1. / 1.7 * dis_to_next)
                    if point_up < 0:
                        point_up = 1
-                    point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next)
+                    point_down = peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next)
                elif jj == 1:
-                    point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next)
+                    point_down = peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next)
-                    if point_down >= img_path.shape[0]:
+                    if point_down >= img_patch.shape[0]:
-                        point_down = img_path.shape[0] - 2
+                        point_down = img_patch.shape[0] - 2
-                    point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next)
+                    point_up = peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next)
                distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True)
                             for mj in range(len(xv))]
@ -692,8 +743,8 @@ class textlineerkenner:
                    x_min = x_min_cont
                    x_max = x_max_cont
                else:
-                    x_min = np.min(xvinside)  # max(x_min_interest,x_min_cont)
+                    x_min = np.min(xvinside)
-                    x_max = np.max(xvinside)  # min(x_max_interest,x_max_cont)
+                    x_max = np.max(xvinside)
                p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)])
                p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)])
@ -737,9 +788,9 @@ class textlineerkenner:
                elif jj == len(peaks) - 1:
                    dis_to_next = peaks[jj] - peaks[jj - 1]
                    # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next)
-                    point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next)
+                    point_down = peaks[jj] + first_nonzero + int(1. / 1.7 * dis_to_next)
-                    if point_down >= img_path.shape[0]:
+                    if point_down >= img_patch.shape[0]:
-                        point_down = img_path.shape[0] - 2
+                        point_down = img_patch.shape[0] - 2
                    # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next)
                    point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next)
                else:
@ -858,65 +909,7 @@ class textlineerkenner:
        return contours_rotated_clean
    def textline_contours_to_get_slope_correctly(self, textline_mask, img_patch, contour_interest):
        slope_new = 0  # deskew_images(img_patch)
        textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
        textline_mask = textline_mask.astype(np.uint8)
        textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, self.kernel)
        textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, self.kernel)
        textline_mask = cv2.erode(textline_mask, self.kernel, iterations=1)
        imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(imgray, 0, 255, 0)
        thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, self.kernel)
        thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, self.kernel)
        contours, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.003)
        textline_maskt = textline_mask[:, :, 0]
        textline_maskt[textline_maskt != 0] = 1
        peaks_point, _ = self.seperate_lines(textline_maskt, contour_interest, slope_new)
        mean_dis = np.mean(np.diff(peaks_point))
        len_x = thresh.shape[1]
        slope_lines = []
        contours_slope_new = []
        for kk in range(len(main_contours)):
            xminh = np.min(main_contours[kk][:, 0])
            xmaxh = np.max(main_contours[kk][:, 0])
            yminh = np.min(main_contours[kk][:, 1])
            ymaxh = np.max(main_contours[kk][:, 1])
            if ymaxh - yminh <= mean_dis and (
                    xmaxh - xminh) >= 0.3 * len_x:  # xminh>=0.05*len_x and xminh<=0.4*len_x and xmaxh<=0.95*len_x and xmaxh>=0.6*len_x:
                contours_slope_new.append(main_contours[kk])
                rows, cols = thresh.shape[:2]
                [vx, vy, x, y] = cv2.fitLine(main_contours[kk], cv2.DIST_L2, 0, 0.01, 0.01)
                slope_lines.append((vy / vx) / np.pi * 180)
            if len(slope_lines) >= 2:
                slope = np.mean(slope_lines)  # slope_true/np.pi*180
            else:
                slope = 999
        else:
            slope = 0
        return slope
    def return_contours_of_image(self,image_box_tabels_1):
        image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2)
@ -935,18 +928,18 @@ class textlineerkenner:
    def isNaN(self,num):
        return num != num
-    def find_num_col(self,regions_without_seperators,sigma_,multiplier=3.8 ):
+    def get_standard_deviation_of_summed_textline_patch_along_width(self,img_patch,sigma_,multiplier=3.8 ):
-        regions_without_seperators_0=regions_without_seperators[:,:].sum(axis=1)
+        img_patch_sum_along_width=img_patch[:,:].sum(axis=1)
-        meda_n_updown=regions_without_seperators_0[len(regions_without_seperators_0)::-1]
+        img_patch_sum_along_width_updown=img_patch_sum_along_width[len(img_patch_sum_along_width)::-1]
-        first_nonzero=(next((i for i, x in enumerate(regions_without_seperators_0) if x), 0))
+        first_nonzero=(next((i for i, x in enumerate(img_patch_sum_along_width) if x), 0))
-        last_nonzero=(next((i for i, x in enumerate(meda_n_updown) if x), 0))
+        last_nonzero=(next((i for i, x in enumerate(img_patch_sum_along_width_updown) if x), 0))
-        last_nonzero=len(regions_without_seperators_0)-last_nonzero
+        last_nonzero=len(img_patch_sum_along_width)-last_nonzero
-        y=regions_without_seperators_0#[first_nonzero:last_nonzero]
+        y=img_patch_sum_along_width#[first_nonzero:last_nonzero]
        y_help=np.zeros(len(y)+20)
@ -972,151 +965,126 @@ class textlineerkenner:
        peaks_neg=peaks_neg-10-10
        last_nonzero=last_nonzero-0#100
        first_nonzero=first_nonzero+0#+100
        peaks_neg=peaks_neg[(peaks_neg>first_nonzero) & (peaks_neg<last_nonzero)]
        peaks=peaks[(peaks>.06*regions_without_seperators.shape[1]) & (peaks<0.94*regions_without_seperators.shape[1])]
        interest_pos=z[peaks]
        interest_pos=interest_pos[interest_pos>10]
        interest_neg=z[peaks_neg]
-        
+        min_peaks_pos=np.mean(interest_pos)
        if interest_neg[0]<0.1:
            interest_neg=interest_neg[1:]
        if interest_neg[len(interest_neg)-1]<0.1:
            interest_neg=interest_neg[:len(interest_neg)-1]
        min_peaks_pos=np.min(interest_pos)
        min_peaks_neg=0#np.min(interest_neg)
        dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier
        #print(interest_pos)
        grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
-        interest_neg_fin=interest_neg#[(interest_neg<grenze)]
+        interest_neg_fin=interest_neg[(interest_neg<grenze)]
-        peaks_neg_fin=peaks_neg#[(interest_neg<grenze)]
+        peaks_neg_fin=peaks_neg[(interest_neg<grenze)]
-        interest_neg_fin=interest_neg#[(interest_neg<grenze)]
+        interest_neg_fin=interest_neg[(interest_neg<grenze)]
-        num_col=(len(interest_neg_fin))+1
+        return interest_neg_fin,np.std(z)
    def return_deskew_slope(self,img_patch,sigma_des):
        img_patch_copy=np.zeros((img_patch.shape[0],img_patch.shape[1]))
        img_patch_copy[:,:]=img_patch[:,:]#img_patch_org[:,:,0]
-        p_l=0
+        img_patch_padded=np.zeros((int( img_patch_copy.shape[0]*(1.2) ) , int( img_patch_copy.shape[1]*(2.6) ) ))
        p_u=len(y)-1
        p_m=int(len(y)/2.)
        p_g_l=int(len(y)/3.)
        p_g_u=len(y)-int(len(y)/3.)
-        
+        img_patch_padded[ int( img_patch_copy.shape[0]*(.1)):int( img_patch_copy.shape[0]*(.1))+img_patch_copy.shape[0] , int( img_patch_copy.shape[1]*(.8)):int( img_patch_copy.shape[1]*(.8))+img_patch_copy.shape[1] ]=img_patch_copy[:,:]
-        diff_peaks=np.abs( np.diff(peaks_neg_fin) )
+        angles=np.linspace(-12,12,40)
        diff_peaks_annormal=diff_peaks[diff_peaks<30]
        return interest_neg_fin
    def return_deskew_slop(self,img_patch_org,sigma_des):
        img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1]))
        img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
        img_resized=np.zeros((int( img_int.shape[0]*(1.2) ) , int( img_int.shape[1]*(1.2) ) ))
        img_resized[ int( img_int.shape[0]*(.1)):int( img_int.shape[0]*(.1))+img_int.shape[0] , int( img_int.shape[1]*(.1)):int( img_int.shape[1]*(.1))+img_int.shape[1] ]=img_int[:,:]
        angels=np.linspace(-4,4,60)
        res=[]
        num_of_peaks=[]
        index_cor=[]
        var_res=[]
        indexer=0
-        for rot in angels:
+        for rot in angles:
-            img_rot=self.rotate_image(img_resized,rot)
+            img_rotated=self.rotate_image(img_patch_padded,rot)
-            img_rot[img_rot!=0]=1
+            img_rotated[img_rotated!=0]=1
-            res_me=np.mean(self.find_num_col(img_rot,sigma_des,200.3  ))
+            try:
                neg_peaks,var_spectrum=self.get_standard_deviation_of_summed_textline_patch_along_width(img_rotated,sigma_des,20.3  )
                res_me=np.mean(neg_peaks)
                if res_me==0:
                    res_me=1000000000000000000000
                else:
                    pass
                res_num=len(neg_peaks)
            except:
                res_me=1000000000000000000000
                res_num=0
                var_spectrum=0
            if self.isNaN(res_me):
                pass
            else:
                res.append( res_me )
                var_res.append(var_spectrum)
                num_of_peaks.append( res_num )
                index_cor.append(indexer)
            indexer=indexer+1
-        res=np.array(res)
+        try:
-        arg_int=np.argmin(res)
+            var_res=np.array(var_res)
        arg_fin=index_cor[arg_int]
        ang_int=angels[arg_fin]
-        img_rot=self.rotate_image(img_resized,ang_int)
+            ang_int=angles[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
-        img_rot[img_rot!=0]=1
+        except:
            ang_int=0
        return ang_int
-    def do_work_of_slopes(self,q,poly,box_sub,boxes_per_process,contours_sub,textline_mask_tot,contours_per_process):
+    def do_work_of_slopes(self,queue_of_slopes_per_textregion,queue_of_textlines_rectangle_per_textregion
-        slope_biggest=0
+                          ,queue_of_textregion_box,boxes_per_process,queue_of_quntours_of_textregion,textline_mask_tot,contours_per_process):
-        slopes_sub = []
+        
-        boxes_sub_new=[]
+        slopes_per_each_subprocess = []
-        poly_sub=[]
+        bounding_box_of_textregion_per_each_subprocess=[]
-        contours_sub_per_p=[]
+        textlines_rectangles_per_each_subprocess=[]
        contours_textregion_per_each_subprocess=[]
        for mv in range(len(boxes_per_process)):
-            contours_sub_per_p.append(contours_per_process[mv])
+            contours_textregion_per_each_subprocess.append(contours_per_process[mv])
            crop_img, _ = self.crop_image_inside_box(boxes_per_process[mv],
                                                                        np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2))
            crop_img=crop_img[:,:,0]
            crop_img=cv2.erode(crop_img,self.kernel,iterations = 2)
            try:
-                textline_con,hierachy=self.return_contours_of_image(crop_img)
+                sigma_des=2
-                textline_con_fil=self.filter_contours_area_of_image(crop_img,textline_con,hierachy,max_area=1,min_area=0.0008)
+                slope_corresponding_textregion=self.return_deskew_slope(crop_img,sigma_des)
                y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil)
                sigma_des=int(  y_diff_mean * (4./40.0) )
                if sigma_des<1:
                    sigma_des=1
                crop_img[crop_img>0]=1
                slope_corresponding_textregion=self.return_deskew_slop(crop_img,sigma_des)
            except:
                slope_corresponding_textregion=999
            if np.abs(slope_corresponding_textregion)>12.5 and slope_corresponding_textregion!=999:
-                slope_corresponding_textregion=slope_biggest
+                slope_corresponding_textregion=0
            elif slope_corresponding_textregion==999:
-                slope_corresponding_textregion=slope_biggest
+                slope_corresponding_textregion=0
-            slopes_sub.append(slope_corresponding_textregion)
+            slopes_per_each_subprocess.append(slope_corresponding_textregion)
-            cnt_clean_rot = self.textline_contours_postprocessing(crop_img
+            bounding_rectangle_of_textlines = self.textline_contours_postprocessing(crop_img
                                                                                        , slope_corresponding_textregion,
                                                                                        contours_per_process[mv], boxes_per_process[mv])
-            poly_sub.append(cnt_clean_rot)
+            textlines_rectangles_per_each_subprocess.append(bounding_rectangle_of_textlines)
-            boxes_sub_new.append(boxes_per_process[mv] )
+            bounding_box_of_textregion_per_each_subprocess.append(boxes_per_process[mv] )
-        q.put(slopes_sub)
+        queue_of_slopes_per_textregion.put(slopes_per_each_subprocess)
-        poly.put(poly_sub)
+        queue_of_textlines_rectangle_per_textregion.put(textlines_rectangles_per_each_subprocess)
-        box_sub.put(boxes_sub_new )
+        queue_of_textregion_box.put(bounding_box_of_textregion_per_each_subprocess )
-        contours_sub.put(contours_sub_per_p)
+        queue_of_quntours_of_textregion.put(contours_textregion_per_each_subprocess)
    def get_slopes_and_deskew(self, contours,textline_mask_tot):
        slope_biggest=0#self.return_deskew_slop(img_int_p,sigma_des)
        num_cores = cpu_count()
-        q = Queue()
+        
-        poly=Queue()
+        queue_of_slopes_per_textregion = Queue()
-        box_sub=Queue()
+        queue_of_textlines_rectangle_per_textregion=Queue()
-        contours_sub=Queue()
+        queue_of_textregion_box=Queue()
        queue_of_quntours_of_textregion=Queue()
        processes = []
        nh=np.linspace(0, len(self.boxes), num_cores+1)
@ -1125,7 +1093,8 @@ class textlineerkenner:
        for i in range(num_cores):
            boxes_per_process=self.boxes[int(nh[i]):int(nh[i+1])]
            contours_per_process=contours[int(nh[i]):int(nh[i+1])]
-            processes.append(Process(target=self.do_work_of_slopes, args=(q,poly,box_sub,  boxes_per_process, contours_sub, textline_mask_tot, contours_per_process)))
+            processes.append(Process(target=self.do_work_of_slopes, args=(queue_of_slopes_per_textregion,queue_of_textlines_rectangle_per_textregion,
                                                                          queue_of_textregion_box,  boxes_per_process, queue_of_quntours_of_textregion, textline_mask_tot, contours_per_process)))
        for i in range(num_cores):
            processes[i].start()
@ -1136,10 +1105,10 @@ class textlineerkenner:
        self.boxes=[]
        for i in range(num_cores):
-            slopes_for_sub_process=q.get(True)
+            slopes_for_sub_process=queue_of_slopes_per_textregion.get(True)
-            boxes_for_sub_process=box_sub.get(True)
+            boxes_for_sub_process=queue_of_textregion_box.get(True)
-            polys_for_sub_process=poly.get(True)
+            polys_for_sub_process=queue_of_textlines_rectangle_per_textregion.get(True)
-            contours_for_subprocess=contours_sub.get(True)
+            contours_for_subprocess=queue_of_quntours_of_textregion.get(True)
            for j in range(len(slopes_for_sub_process)):
                self.slopes.append(slopes_for_sub_process[j])
@ -1154,11 +1123,11 @@ class textlineerkenner:
    def order_of_regions(self, textline_mask,contours_main):
-        mada_n=textline_mask.sum(axis=1)
+        textline_sum_along_width=textline_mask.sum(axis=1)
        y=mada_n[:]
-        y_help=np.zeros(len(y)+40)
+        y=textline_sum_along_width[:]
-        y_help[20:len(y)+20]=y
+        y_padded=np.zeros(len(y)+40)
        y_padded[20:len(y)+20]=y
        x=np.array( range(len(y)) )
@ -1167,8 +1136,8 @@ class textlineerkenner:
        sigma_gaus=8
-        z= gaussian_filter1d(y_help, sigma_gaus)
+        z= gaussian_filter1d(y_padded, sigma_gaus)
-        zneg_rev=-y_help+np.max(y_help)
+        zneg_rev=-y_padded+np.max(y_padded)
        zneg=np.zeros(len(zneg_rev)+40)
        zneg[20:len(zneg_rev)+20]=zneg_rev
@ -1424,6 +1393,12 @@ class textlineerkenner:
        # extract text regions and corresponding contours and surrounding box
        text_regions=self.extract_text_regions(image_page)
        text_regions = cv2.erode(text_regions, self.kernel, iterations=3)
        text_regions = cv2.dilate(text_regions, self.kernel, iterations=4)
        #plt.imshow(text_regions[:,:,0])
        #plt.show()
        contours=self.get_text_region_contours_and_boxes(text_regions)
@ -1441,9 +1416,6 @@ class textlineerkenner:
            # extracting textlines using segmentation
            textline_mask_tot=self.textline_contours(image_page)
            #print(textline_mask_tot)
            #plt.imshow(textline_mask_tot)
            #plt.show()
            ##########  
            K.clear_session()
            gc.collect()
@ -1495,7 +1467,6 @@ class textlineerkenner:
@click.command()
@click.option('--image', '-i', help='image filename', type=click.Path(exists=True, dir_okay=False))
@click.option('--out', '-o', help='directory to write output xml data', type=click.Path(exists=True, file_okay=False))
@ -1509,3 +1480,4 @@ def main(image, out, model):
 if __name__ == "__main__":
    main()