mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-09 20:29:55 +02:00
simplify chained comparisons
This commit is contained in:
parent
b9030f5203
commit
2722a9a464
5 changed files with 30 additions and 50 deletions
|
@ -513,31 +513,31 @@ class Eynollah:
|
||||||
img_w_new = 2000
|
img_w_new = 2000
|
||||||
elif num_col == 1 and width_early >= 2500:
|
elif num_col == 1 and width_early >= 2500:
|
||||||
img_w_new = 2000
|
img_w_new = 2000
|
||||||
elif num_col == 1 and width_early >= 1100 and width_early < 2500:
|
elif num_col == 1 and 1100 <= width_early < 2500:
|
||||||
img_w_new = width_early
|
img_w_new = width_early
|
||||||
elif num_col == 2 and width_early < 2000:
|
elif num_col == 2 and width_early < 2000:
|
||||||
img_w_new = 2400
|
img_w_new = 2400
|
||||||
elif num_col == 2 and width_early >= 3500:
|
elif num_col == 2 and width_early >= 3500:
|
||||||
img_w_new = 2400
|
img_w_new = 2400
|
||||||
elif num_col == 2 and width_early >= 2000 and width_early < 3500:
|
elif num_col == 2 and 2000 <= width_early < 3500:
|
||||||
img_w_new = width_early
|
img_w_new = width_early
|
||||||
elif num_col == 3 and width_early < 2000:
|
elif num_col == 3 and width_early < 2000:
|
||||||
img_w_new = 3000
|
img_w_new = 3000
|
||||||
elif num_col == 3 and width_early >= 4000:
|
elif num_col == 3 and width_early >= 4000:
|
||||||
img_w_new = 3000
|
img_w_new = 3000
|
||||||
elif num_col == 3 and width_early >= 2000 and width_early < 4000:
|
elif num_col == 3 and 2000 <= width_early < 4000:
|
||||||
img_w_new = width_early
|
img_w_new = width_early
|
||||||
elif num_col == 4 and width_early < 2500:
|
elif num_col == 4 and width_early < 2500:
|
||||||
img_w_new = 4000
|
img_w_new = 4000
|
||||||
elif num_col == 4 and width_early >= 5000:
|
elif num_col == 4 and width_early >= 5000:
|
||||||
img_w_new = 4000
|
img_w_new = 4000
|
||||||
elif num_col == 4 and width_early >= 2500 and width_early < 5000:
|
elif num_col == 4 and 2500 <= width_early < 5000:
|
||||||
img_w_new = width_early
|
img_w_new = width_early
|
||||||
elif num_col == 5 and width_early < 3700:
|
elif num_col == 5 and width_early < 3700:
|
||||||
img_w_new = 5000
|
img_w_new = 5000
|
||||||
elif num_col == 5 and width_early >= 7000:
|
elif num_col == 5 and width_early >= 7000:
|
||||||
img_w_new = 5000
|
img_w_new = 5000
|
||||||
elif num_col == 5 and width_early >= 3700 and width_early < 7000:
|
elif num_col == 5 and 3700 <= width_early < 7000:
|
||||||
img_w_new = width_early
|
img_w_new = width_early
|
||||||
elif num_col == 6 and width_early < 4500:
|
elif num_col == 6 and width_early < 4500:
|
||||||
img_w_new = 6500 # 5400
|
img_w_new = 6500 # 5400
|
||||||
|
@ -2166,10 +2166,8 @@ class Eynollah:
|
||||||
for ii in range(len(cx_text_only)):
|
for ii in range(len(cx_text_only)):
|
||||||
check_if_textregion_located_in_a_box = False
|
check_if_textregion_located_in_a_box = False
|
||||||
for jj in range(len(boxes)):
|
for jj in range(len(boxes)):
|
||||||
if (x_min_text_only[ii] + 80 >= boxes[jj][0] and
|
if (boxes[jj][0] <= x_min_text_only[ii] + 80 < boxes[jj][1] and
|
||||||
x_min_text_only[ii] + 80 < boxes[jj][1] and
|
boxes[jj][2] <= y_cor_x_min_main[ii] < boxes[jj][3]):
|
||||||
y_cor_x_min_main[ii] >= boxes[jj][2] and
|
|
||||||
y_cor_x_min_main[ii] < boxes[jj][3]):
|
|
||||||
arg_text_con.append(jj)
|
arg_text_con.append(jj)
|
||||||
check_if_textregion_located_in_a_box = True
|
check_if_textregion_located_in_a_box = True
|
||||||
break
|
break
|
||||||
|
@ -2184,10 +2182,8 @@ class Eynollah:
|
||||||
for ii in range(len(cx_text_only_h)):
|
for ii in range(len(cx_text_only_h)):
|
||||||
check_if_textregion_located_in_a_box = False
|
check_if_textregion_located_in_a_box = False
|
||||||
for jj in range(len(boxes)):
|
for jj in range(len(boxes)):
|
||||||
if (x_min_text_only_h[ii] + 80 >= boxes[jj][0] and
|
if (boxes[jj][0] <= x_min_text_only_h[ii] + 80 < boxes[jj][1] and
|
||||||
x_min_text_only_h[ii] + 80 < boxes[jj][1] and
|
boxes[jj][2] <= y_cor_x_min_main_h[ii] < boxes[jj][3]):
|
||||||
y_cor_x_min_main_h[ii] >= boxes[jj][2] and
|
|
||||||
y_cor_x_min_main_h[ii] < boxes[jj][3]):
|
|
||||||
arg_text_con_h.append(jj)
|
arg_text_con_h.append(jj)
|
||||||
check_if_textregion_located_in_a_box = True
|
check_if_textregion_located_in_a_box = True
|
||||||
break
|
break
|
||||||
|
@ -2263,10 +2259,8 @@ class Eynollah:
|
||||||
for ii in range(len(cx_text_only)):
|
for ii in range(len(cx_text_only)):
|
||||||
check_if_textregion_located_in_a_box = False
|
check_if_textregion_located_in_a_box = False
|
||||||
for jj in range(len(boxes)):
|
for jj in range(len(boxes)):
|
||||||
if (cx_text_only[ii] >= boxes[jj][0] and
|
if (boxes[jj][0] <= cx_text_only[ii] < boxes[jj][1] and
|
||||||
cx_text_only[ii] < boxes[jj][1] and
|
boxes[jj][2] <= cy_text_only[ii] < boxes[jj][3]):
|
||||||
cy_text_only[ii] >= boxes[jj][2] and
|
|
||||||
cy_text_only[ii] < boxes[jj][3]):
|
|
||||||
# this is valid if the center of region identify in which box it is located
|
# this is valid if the center of region identify in which box it is located
|
||||||
arg_text_con.append(jj)
|
arg_text_con.append(jj)
|
||||||
check_if_textregion_located_in_a_box = True
|
check_if_textregion_located_in_a_box = True
|
||||||
|
@ -2287,10 +2281,8 @@ class Eynollah:
|
||||||
for ii in range(len(cx_text_only_h)):
|
for ii in range(len(cx_text_only_h)):
|
||||||
check_if_textregion_located_in_a_box = False
|
check_if_textregion_located_in_a_box = False
|
||||||
for jj in range(len(boxes)):
|
for jj in range(len(boxes)):
|
||||||
if (cx_text_only_h[ii] >= boxes[jj][0] and
|
if (boxes[jj][0] <= cx_text_only_h[ii] < boxes[jj][1] and
|
||||||
cx_text_only_h[ii] < boxes[jj][1] and
|
boxes[jj][2] <= cy_text_only_h[ii] < boxes[jj][3]):
|
||||||
cy_text_only_h[ii] >= boxes[jj][2] and
|
|
||||||
cy_text_only_h[ii] < boxes[jj][3]):
|
|
||||||
# this is valid if the center of region identify in which box it is located
|
# this is valid if the center of region identify in which box it is located
|
||||||
arg_text_con_h.append(jj)
|
arg_text_con_h.append(jj)
|
||||||
check_if_textregion_located_in_a_box = True
|
check_if_textregion_located_in_a_box = True
|
||||||
|
@ -2375,10 +2367,8 @@ class Eynollah:
|
||||||
for ii in range(len(cx_text_only)):
|
for ii in range(len(cx_text_only)):
|
||||||
check_if_textregion_located_in_a_box = False
|
check_if_textregion_located_in_a_box = False
|
||||||
for jj in range(len(boxes)):
|
for jj in range(len(boxes)):
|
||||||
if (x_min_text_only[ii] + 80 >= boxes[jj][0] and
|
if (boxes[jj][0] <= x_min_text_only[ii] + 80 < boxes[jj][1] and
|
||||||
x_min_text_only[ii] + 80 < boxes[jj][1] and
|
boxes[jj][2] <= y_cor_x_min_main[ii] < boxes[jj][3]):
|
||||||
y_cor_x_min_main[ii] >= boxes[jj][2] and
|
|
||||||
y_cor_x_min_main[ii] < boxes[jj][3]):
|
|
||||||
arg_text_con.append(jj)
|
arg_text_con.append(jj)
|
||||||
check_if_textregion_located_in_a_box = True
|
check_if_textregion_located_in_a_box = True
|
||||||
break
|
break
|
||||||
|
@ -2437,10 +2427,8 @@ class Eynollah:
|
||||||
for ii in range(len(cx_text_only)):
|
for ii in range(len(cx_text_only)):
|
||||||
check_if_textregion_located_in_a_box = False
|
check_if_textregion_located_in_a_box = False
|
||||||
for jj in range(len(boxes)):
|
for jj in range(len(boxes)):
|
||||||
if (cx_text_only[ii] >= boxes[jj][0] and
|
if (boxes[jj][0] <= cx_text_only[ii] < boxes[jj][1] and
|
||||||
cx_text_only[ii] < boxes[jj][1] and
|
boxes[jj][2] <= cy_text_only[ii] < boxes[jj][3]):
|
||||||
cy_text_only[ii] >= boxes[jj][2] and
|
|
||||||
cy_text_only[ii] < boxes[jj][3]):
|
|
||||||
# this is valid if the center of region identify in which box it is located
|
# this is valid if the center of region identify in which box it is located
|
||||||
arg_text_con.append(jj)
|
arg_text_con.append(jj)
|
||||||
check_if_textregion_located_in_a_box = True
|
check_if_textregion_located_in_a_box = True
|
||||||
|
@ -2746,7 +2734,7 @@ class Eynollah:
|
||||||
prediction_table = prediction_table.astype(np.int16)
|
prediction_table = prediction_table.astype(np.int16)
|
||||||
return prediction_table[:,:,0]
|
return prediction_table[:,:,0]
|
||||||
else:
|
else:
|
||||||
if num_col_classifier < 4 and num_col_classifier > 2:
|
if 4 > num_col_classifier > 2:
|
||||||
prediction_table = self.do_prediction(patches, img, self.model_table)
|
prediction_table = self.do_prediction(patches, img, self.model_table)
|
||||||
pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table)
|
pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), self.model_table)
|
||||||
pre_updown = cv2.flip(pre_updown, -1)
|
pre_updown = cv2.flip(pre_updown, -1)
|
||||||
|
|
|
@ -519,12 +519,10 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
|
||||||
peaks_neg_true[1] > p_m)):
|
peaks_neg_true[1] > p_m)):
|
||||||
num_col = 1
|
num_col = 1
|
||||||
peaks_neg_true = []
|
peaks_neg_true = []
|
||||||
elif (peaks_neg_true[0] < p_g_u and
|
elif (p_g_u > peaks_neg_true[0] > p_g_l and
|
||||||
peaks_neg_true[0] > p_g_l and
|
|
||||||
peaks_neg_true[1] > p_u_quarter):
|
peaks_neg_true[1] > p_u_quarter):
|
||||||
peaks_neg_true = [peaks_neg_true[0]]
|
peaks_neg_true = [peaks_neg_true[0]]
|
||||||
elif (peaks_neg_true[1] < p_g_u and
|
elif (p_g_u > peaks_neg_true[1] > p_g_l and
|
||||||
peaks_neg_true[1] > p_g_l and
|
|
||||||
peaks_neg_true[0] < p_quarter):
|
peaks_neg_true[0] < p_quarter):
|
||||||
peaks_neg_true = [peaks_neg_true[1]]
|
peaks_neg_true = [peaks_neg_true[1]]
|
||||||
|
|
||||||
|
@ -686,12 +684,10 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8):
|
||||||
peaks_neg_true[1] > p_m)):
|
peaks_neg_true[1] > p_m)):
|
||||||
num_col = 1
|
num_col = 1
|
||||||
peaks_neg_true = []
|
peaks_neg_true = []
|
||||||
elif (peaks_neg_true[0] < p_g_u and
|
elif (p_g_u > peaks_neg_true[0] > p_g_l and
|
||||||
peaks_neg_true[0] > p_g_l and
|
|
||||||
peaks_neg_true[1] > p_u_quarter):
|
peaks_neg_true[1] > p_u_quarter):
|
||||||
peaks_neg_true = [peaks_neg_true[0]]
|
peaks_neg_true = [peaks_neg_true[0]]
|
||||||
elif (peaks_neg_true[1] < p_g_u and
|
elif (p_g_u > peaks_neg_true[1] > p_g_l and
|
||||||
peaks_neg_true[1] > p_g_l and
|
|
||||||
peaks_neg_true[0] < p_quarter):
|
peaks_neg_true[0] < p_quarter):
|
||||||
peaks_neg_true = [peaks_neg_true[1]]
|
peaks_neg_true = [peaks_neg_true[1]]
|
||||||
else:
|
else:
|
||||||
|
@ -2046,8 +2042,7 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
for yic in range(len(y_copy)):
|
for yic in range(len(y_copy)):
|
||||||
#print('burda')
|
#print('burda')
|
||||||
if (y_copy[yic]>y_itself and
|
if (y_copy[yic]>y_itself and
|
||||||
column>=x_start_copy[yic] and
|
x_start_copy[yic] <= column <= x_end_copy[yic]):
|
||||||
column<=x_end_copy[yic]):
|
|
||||||
y_in_cols.append(y_copy[yic])
|
y_in_cols.append(y_copy[yic])
|
||||||
#print('burda2')
|
#print('burda2')
|
||||||
#print(y_in_cols,'y_in_cols')
|
#print(y_in_cols,'y_in_cols')
|
||||||
|
@ -2138,8 +2133,7 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
for yic in range(len(y_copy)):
|
for yic in range(len(y_copy)):
|
||||||
#print('burda')
|
#print('burda')
|
||||||
if (y_copy[yic]>y_itself and
|
if (y_copy[yic]>y_itself and
|
||||||
column>=x_start_copy[yic] and
|
x_start_copy[yic] <= column <= x_end_copy[yic]):
|
||||||
column<=x_end_copy[yic]):
|
|
||||||
y_in_cols.append(y_copy[yic])
|
y_in_cols.append(y_copy[yic])
|
||||||
#print('burda2')
|
#print('burda2')
|
||||||
#print(y_in_cols,'y_in_cols')
|
#print(y_in_cols,'y_in_cols')
|
||||||
|
|
|
@ -45,8 +45,7 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area
|
||||||
|
|
||||||
polygon = geometry.Polygon([point[0] for point in c])
|
polygon = geometry.Polygon([point[0] for point in c])
|
||||||
area = polygon.area
|
area = polygon.area
|
||||||
if (area >= min_area * np.prod(image.shape[:2]) and
|
if (min_area * np.prod(image.shape[:2]) <= area <= max_area * np.prod(image.shape[:2]) and
|
||||||
area <= max_area * np.prod(image.shape[:2]) and
|
|
||||||
hierarchy[0][jv][3] == -1):
|
hierarchy[0][jv][3] == -1):
|
||||||
found_polygons_early.append(np.array([[point]
|
found_polygons_early.append(np.array([[point]
|
||||||
for point in polygon.exterior.coords], dtype=np.uint))
|
for point in polygon.exterior.coords], dtype=np.uint))
|
||||||
|
@ -64,8 +63,7 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
|
||||||
##print(np.prod(thresh.shape[:2]))
|
##print(np.prod(thresh.shape[:2]))
|
||||||
# Check that polygon has area greater than minimal area
|
# Check that polygon has area greater than minimal area
|
||||||
# print(hierarchy[0][jv][3],hierarchy )
|
# print(hierarchy[0][jv][3],hierarchy )
|
||||||
if (area >= min_area * np.prod(image.shape[:2]) and
|
if (min_area * np.prod(image.shape[:2]) <= area <= max_area * np.prod(image.shape[:2]) and
|
||||||
area <= max_area * np.prod(image.shape[:2]) and
|
|
||||||
# hierarchy[0][jv][3]==-1
|
# hierarchy[0][jv][3]==-1
|
||||||
True):
|
True):
|
||||||
# print(c[0][0][1])
|
# print(c[0][0][1])
|
||||||
|
|
|
@ -18,7 +18,7 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
|
||||||
|
|
||||||
if text_with_lines.shape[0]<=1500:
|
if text_with_lines.shape[0]<=1500:
|
||||||
pass
|
pass
|
||||||
elif text_with_lines.shape[0]>1500 and text_with_lines.shape[0]<=1800:
|
elif 1500 < text_with_lines.shape[0] <= 1800:
|
||||||
text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.5),text_with_lines.shape[1])
|
text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.5),text_with_lines.shape[1])
|
||||||
text_with_lines=cv2.erode(text_with_lines,kernel,iterations=5)
|
text_with_lines=cv2.erode(text_with_lines,kernel,iterations=5)
|
||||||
text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1])
|
text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1])
|
||||||
|
@ -37,7 +37,7 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_ve
|
||||||
|
|
||||||
if thickness_along_y_percent<30:
|
if thickness_along_y_percent<30:
|
||||||
min_textline_thickness=8
|
min_textline_thickness=8
|
||||||
elif thickness_along_y_percent>=30 and thickness_along_y_percent<50:
|
elif 30 <= thickness_along_y_percent < 50:
|
||||||
min_textline_thickness=20
|
min_textline_thickness=20
|
||||||
else:
|
else:
|
||||||
min_textline_thickness=40
|
min_textline_thickness=40
|
||||||
|
|
|
@ -1144,7 +1144,7 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
||||||
x = np.array(range(len(y)))
|
x = np.array(range(len(y)))
|
||||||
|
|
||||||
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
|
peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0)
|
||||||
if len(peaks_real) <= 2 and len(peaks_real) > 1:
|
if 2 >= len(peaks_real) > 1:
|
||||||
sigma_gaus = 10
|
sigma_gaus = 10
|
||||||
else:
|
else:
|
||||||
sigma_gaus = 5
|
sigma_gaus = 5
|
||||||
|
@ -1445,7 +1445,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
|
||||||
except:
|
except:
|
||||||
slope_xline = 0
|
slope_xline = 0
|
||||||
|
|
||||||
if abs(slope_region) < 25 and abs(slope_xline) > 25:
|
if abs(slope_region) < 25 < abs(slope_xline):
|
||||||
slope_xline = [slope_region][0]
|
slope_xline = [slope_region][0]
|
||||||
# if abs(slope_region)>70 and abs(slope_xline)<25:
|
# if abs(slope_region)>70 and abs(slope_xline)<25:
|
||||||
# slope_xline=[slope_region][0]
|
# slope_xline=[slope_region][0]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue