diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 965754a..0256128 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -513,31 +513,31 @@ class Eynollah: img_w_new = 2000 elif num_col == 1 and width_early >= 2500: img_w_new = 2000 - elif num_col == 1 and width_early >= 1100 and width_early < 2500: + elif num_col == 1 and 1100 <= width_early < 2500: img_w_new = width_early elif num_col == 2 and width_early < 2000: img_w_new = 2400 elif num_col == 2 and width_early >= 3500: img_w_new = 2400 - elif num_col == 2 and width_early >= 2000 and width_early < 3500: + elif num_col == 2 and 2000 <= width_early < 3500: img_w_new = width_early elif num_col == 3 and width_early < 2000: img_w_new = 3000 elif num_col == 3 and width_early >= 4000: img_w_new = 3000 - elif num_col == 3 and width_early >= 2000 and width_early < 4000: + elif num_col == 3 and 2000 <= width_early < 4000: img_w_new = width_early elif num_col == 4 and width_early < 2500: img_w_new = 4000 elif num_col == 4 and width_early >= 5000: img_w_new = 4000 - elif num_col == 4 and width_early >= 2500 and width_early < 5000: + elif num_col == 4 and 2500 <= width_early < 5000: img_w_new = width_early elif num_col == 5 and width_early < 3700: img_w_new = 5000 elif num_col == 5 and width_early >= 7000: img_w_new = 5000 - elif num_col == 5 and width_early >= 3700 and width_early < 7000: + elif num_col == 5 and 3700 <= width_early < 7000: img_w_new = width_early elif num_col == 6 and width_early < 4500: img_w_new = 6500 # 5400 @@ -2166,10 +2166,8 @@ class Eynollah: for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if (x_min_text_only[ii] + 80 >= boxes[jj][0] and - x_min_text_only[ii] + 80 < boxes[jj][1] and - y_cor_x_min_main[ii] >= boxes[jj][2] and - y_cor_x_min_main[ii] < boxes[jj][3]): + if (boxes[jj][0] <= x_min_text_only[ii] + 80 < boxes[jj][1] and + boxes[jj][2] <= y_cor_x_min_main[ii] < boxes[jj][3]): arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break @@ -2184,10 +2182,8 @@ class Eynollah: for ii in range(len(cx_text_only_h)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if (x_min_text_only_h[ii] + 80 >= boxes[jj][0] and - x_min_text_only_h[ii] + 80 < boxes[jj][1] and - y_cor_x_min_main_h[ii] >= boxes[jj][2] and - y_cor_x_min_main_h[ii] < boxes[jj][3]): + if (boxes[jj][0] <= x_min_text_only_h[ii] + 80 < boxes[jj][1] and + boxes[jj][2] <= y_cor_x_min_main_h[ii] < boxes[jj][3]): arg_text_con_h.append(jj) check_if_textregion_located_in_a_box = True break @@ -2263,10 +2259,8 @@ class Eynollah: for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if (cx_text_only[ii] >= boxes[jj][0] and - cx_text_only[ii] < boxes[jj][1] and - cy_text_only[ii] >= boxes[jj][2] and - cy_text_only[ii] < boxes[jj][3]): + if (boxes[jj][0] <= cx_text_only[ii] < boxes[jj][1] and + boxes[jj][2] <= cy_text_only[ii] < boxes[jj][3]): # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) check_if_textregion_located_in_a_box = True @@ -2287,10 +2281,8 @@ class Eynollah: for ii in range(len(cx_text_only_h)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if (cx_text_only_h[ii] >= boxes[jj][0] and - cx_text_only_h[ii] < boxes[jj][1] and - cy_text_only_h[ii] >= boxes[jj][2] and - cy_text_only_h[ii] < boxes[jj][3]): + if (boxes[jj][0] <= cx_text_only_h[ii] < boxes[jj][1] and + boxes[jj][2] <= cy_text_only_h[ii] < boxes[jj][3]): # this is valid if the center of region identify in which box it is located arg_text_con_h.append(jj) check_if_textregion_located_in_a_box = True @@ -2375,10 +2367,8 @@ class Eynollah: for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if (x_min_text_only[ii] + 80 >= boxes[jj][0] and - x_min_text_only[ii] + 80 < boxes[jj][1] and - y_cor_x_min_main[ii] >= boxes[jj][2] and - y_cor_x_min_main[ii] < boxes[jj][3]): + if (boxes[jj][0] <= x_min_text_only[ii] + 80 < boxes[jj][1] and + boxes[jj][2] <= y_cor_x_min_main[ii] < boxes[jj][3]): arg_text_con.append(jj) check_if_textregion_located_in_a_box = True break @@ -2437,10 +2427,8 @@ class Eynollah: for ii in range(len(cx_text_only)): check_if_textregion_located_in_a_box = False for jj in range(len(boxes)): - if (cx_text_only[ii] >= boxes[jj][0] and - cx_text_only[ii] < boxes[jj][1] and - cy_text_only[ii] >= boxes[jj][2] and - cy_text_only[ii] < boxes[jj][3]): + if (boxes[jj][0] <= cx_text_only[ii] < boxes[jj][1] and + boxes[jj][2] <= cy_text_only[ii] < boxes[jj][3]): # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) check_if_textregion_located_in_a_box = True @@ -2746,7 +2734,7 @@ class Eynollah: prediction_table = prediction_table.astype(np.int16) return prediction_table[:,:,0] else: - if num_col_classifier < 4 and num_col_classifier > 2: + if 4 > num_col_classifier > 2: prediction_table = self.do_prediction(patches, img, self.model_table) pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table) pre_updown = cv2.flip(pre_updown, -1) diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 6ba445d..35fdf5f 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -519,12 +519,10 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl peaks_neg_true[1] > p_m)): num_col = 1 peaks_neg_true = [] - elif (peaks_neg_true[0] < p_g_u and - peaks_neg_true[0] > p_g_l and + elif (p_g_u > peaks_neg_true[0] > p_g_l and peaks_neg_true[1] > p_u_quarter): peaks_neg_true = [peaks_neg_true[0]] - elif (peaks_neg_true[1] < p_g_u and - peaks_neg_true[1] > p_g_l and + elif (p_g_u > peaks_neg_true[1] > p_g_l and peaks_neg_true[0] < p_quarter): peaks_neg_true = [peaks_neg_true[1]] @@ -686,12 +684,10 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): peaks_neg_true[1] > p_m)): num_col = 1 peaks_neg_true = [] - elif (peaks_neg_true[0] < p_g_u and - peaks_neg_true[0] > p_g_l and + elif (p_g_u > peaks_neg_true[0] > p_g_l and peaks_neg_true[1] > p_u_quarter): peaks_neg_true = [peaks_neg_true[0]] - elif (peaks_neg_true[1] < p_g_u and - peaks_neg_true[1] > p_g_l and + elif (p_g_u > peaks_neg_true[1] > p_g_l and peaks_neg_true[0] < p_quarter): peaks_neg_true = [peaks_neg_true[1]] else: @@ -2046,8 +2042,7 @@ def return_boxes_of_images_by_order_of_reading_new( for yic in range(len(y_copy)): #print('burda') if (y_copy[yic]>y_itself and - column>=x_start_copy[yic] and - column<=x_end_copy[yic]): + x_start_copy[yic] <= column <= x_end_copy[yic]): y_in_cols.append(y_copy[yic]) #print('burda2') #print(y_in_cols,'y_in_cols') @@ -2138,8 +2133,7 @@ def return_boxes_of_images_by_order_of_reading_new( for yic in range(len(y_copy)): #print('burda') if (y_copy[yic]>y_itself and - column>=x_start_copy[yic] and - column<=x_end_copy[yic]): + x_start_copy[yic] <= column <= x_end_copy[yic]): y_in_cols.append(y_copy[yic]) #print('burda2') #print(y_in_cols,'y_in_cols') diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index be00db0..009cee7 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -45,8 +45,7 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area polygon = geometry.Polygon([point[0] for point in c]) area = polygon.area - if (area >= min_area * np.prod(image.shape[:2]) and - area <= max_area * np.prod(image.shape[:2]) and + if (min_area * np.prod(image.shape[:2]) <= area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1): found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint)) @@ -64,8 +63,7 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m ##print(np.prod(thresh.shape[:2])) # Check that polygon has area greater than minimal area # print(hierarchy[0][jv][3],hierarchy ) - if (area >= min_area * np.prod(image.shape[:2]) and - area <= max_area * np.prod(image.shape[:2]) and + if (min_area * np.prod(image.shape[:2]) <= area <= max_area * np.prod(image.shape[:2]) and # hierarchy[0][jv][3]==-1 True): # print(c[0][0][1]) diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py index bacae94..cceb70b 100644 --- a/src/eynollah/utils/marginals.py +++ b/src/eynollah/utils/marginals.py @@ -18,7 +18,7 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve if text_with_lines.shape[0]<=1500: pass - elif text_with_lines.shape[0]>1500 and text_with_lines.shape[0]<=1800: + elif 1500 < text_with_lines.shape[0] <= 1800: text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.5),text_with_lines.shape[1]) text_with_lines=cv2.erode(text_with_lines,kernel,iterations=5) text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1]) @@ -37,7 +37,7 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve if thickness_along_y_percent<30: min_textline_thickness=8 - elif thickness_along_y_percent>=30 and thickness_along_y_percent<50: + elif 30 <= thickness_along_y_percent < 50: min_textline_thickness=20 else: min_textline_thickness=40 diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index 26e1ebe..a32933d 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -1144,7 +1144,7 @@ def separate_lines_new_inside_tiles(img_path, thetha): x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - if len(peaks_real) <= 2 and len(peaks_real) > 1: + if 2 >= len(peaks_real) > 1: sigma_gaus = 10 else: sigma_gaus = 5 @@ -1445,7 +1445,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl except: slope_xline = 0 - if abs(slope_region) < 25 and abs(slope_xline) > 25: + if abs(slope_region) < 25 < abs(slope_xline): slope_xline = [slope_region][0] # if abs(slope_region)>70 and abs(slope_xline)<25: # slope_xline=[slope_region][0]