mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-07 06:59:58 +02:00
return_boxes_of_images_by_order_of_reading_new: simplify, avoid changing dtype during np.append
This commit is contained in:
parent
09ece86f0d
commit
b48c41e68f
2 changed files with 97 additions and 119 deletions
|
@ -3678,7 +3678,7 @@ class Eynollah:
|
||||||
for region in all_found_textline_polygons]
|
for region in all_found_textline_polygons]
|
||||||
|
|
||||||
def dilate_textregions_contours(self, all_found_textline_polygons):
|
def dilate_textregions_contours(self, all_found_textline_polygons):
|
||||||
return [np.array(make_valid(Polygon(poly[:, 0])).buffer(5).exterior.coords,
|
return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords,
|
||||||
dtype=int)[:, np.newaxis]
|
dtype=int)[:, np.newaxis]
|
||||||
for poly in all_found_textline_polygons]
|
for poly in all_found_textline_polygons]
|
||||||
|
|
||||||
|
|
|
@ -1632,6 +1632,7 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
regions_without_separators = cv2.flip(regions_without_separators,1)
|
regions_without_separators = cv2.flip(regions_without_separators,1)
|
||||||
boxes=[]
|
boxes=[]
|
||||||
peaks_neg_tot_tables = []
|
peaks_neg_tot_tables = []
|
||||||
|
splitter_y_new = np.array(splitter_y_new, dtype=int)
|
||||||
for i in range(len(splitter_y_new)-1):
|
for i in range(len(splitter_y_new)-1):
|
||||||
#print(splitter_y_new[i],splitter_y_new[i+1])
|
#print(splitter_y_new[i],splitter_y_new[i+1])
|
||||||
matrix_new = matrix_of_lines_ch[:,:][(matrix_of_lines_ch[:,6]> splitter_y_new[i] ) &
|
matrix_new = matrix_of_lines_ch[:,:][(matrix_of_lines_ch[:,6]> splitter_y_new[i] ) &
|
||||||
|
@ -1644,14 +1645,9 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
# 0.1 * (np.abs(splitter_y_new[i+1]-splitter_y_new[i]))):
|
# 0.1 * (np.abs(splitter_y_new[i+1]-splitter_y_new[i]))):
|
||||||
if True:
|
if True:
|
||||||
try:
|
try:
|
||||||
if erosion_hurts:
|
num_col, peaks_neg_fin = find_num_col(
|
||||||
num_col, peaks_neg_fin = find_num_col(
|
regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :],
|
||||||
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
|
num_col_classifier, tables, multiplier=6. if erosion_hurts else 7.)
|
||||||
num_col_classifier, tables, multiplier=6.)
|
|
||||||
else:
|
|
||||||
num_col, peaks_neg_fin = find_num_col(
|
|
||||||
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
|
|
||||||
num_col_classifier, tables, multiplier=7.)
|
|
||||||
except:
|
except:
|
||||||
peaks_neg_fin=[]
|
peaks_neg_fin=[]
|
||||||
num_col = 0
|
num_col = 0
|
||||||
|
@ -1661,7 +1657,7 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
#print('burda')
|
#print('burda')
|
||||||
if len(peaks_neg_fin)==0:
|
if len(peaks_neg_fin)==0:
|
||||||
num_col, peaks_neg_fin = find_num_col(
|
num_col, peaks_neg_fin = find_num_col(
|
||||||
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
|
regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1], :],
|
||||||
num_col_classifier, tables, multiplier=3.)
|
num_col_classifier, tables, multiplier=3.)
|
||||||
peaks_neg_fin_early=[]
|
peaks_neg_fin_early=[]
|
||||||
peaks_neg_fin_early.append(0)
|
peaks_neg_fin_early.append(0)
|
||||||
|
@ -1674,21 +1670,21 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
peaks_neg_fin_rev=[]
|
peaks_neg_fin_rev=[]
|
||||||
for i_n in range(len(peaks_neg_fin_early)-1):
|
for i_n in range(len(peaks_neg_fin_early)-1):
|
||||||
#print(i_n,'i_n')
|
#print(i_n,'i_n')
|
||||||
#plt.plot(regions_without_separators[int(splitter_y_new[i]):
|
#plt.plot(regions_without_separators[splitter_y_new[i]:
|
||||||
# int(splitter_y_new[i+1]),
|
# splitter_y_new[i+1],
|
||||||
# peaks_neg_fin_early[i_n]:
|
# peaks_neg_fin_early[i_n]:
|
||||||
# peaks_neg_fin_early[i_n+1]].sum(axis=0) )
|
# peaks_neg_fin_early[i_n+1]].sum(axis=0) )
|
||||||
#plt.show()
|
#plt.show()
|
||||||
try:
|
try:
|
||||||
num_col, peaks_neg_fin1 = find_num_col(
|
num_col, peaks_neg_fin1 = find_num_col(
|
||||||
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),
|
regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],
|
||||||
peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],
|
peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],
|
||||||
num_col_classifier,tables, multiplier=7.)
|
num_col_classifier,tables, multiplier=7.)
|
||||||
except:
|
except:
|
||||||
peaks_neg_fin1=[]
|
peaks_neg_fin1=[]
|
||||||
try:
|
try:
|
||||||
num_col, peaks_neg_fin2 = find_num_col(
|
num_col, peaks_neg_fin2 = find_num_col(
|
||||||
regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),
|
regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],
|
||||||
peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],
|
peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],
|
||||||
num_col_classifier,tables, multiplier=5.)
|
num_col_classifier,tables, multiplier=5.)
|
||||||
except:
|
except:
|
||||||
|
@ -1716,7 +1712,7 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
#num_col, peaks_neg_fin = find_num_col(
|
#num_col, peaks_neg_fin = find_num_col(
|
||||||
# regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],
|
# regions_without_separators[splitter_y_new[i]:splitter_y_new[i+1],:],
|
||||||
# multiplier=7.0)
|
# multiplier=7.0)
|
||||||
x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ]
|
x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ]
|
||||||
x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ]
|
x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ]
|
||||||
|
@ -1738,31 +1734,28 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
y_lines_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \
|
y_lines_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \
|
||||||
new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order(
|
new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order(
|
||||||
x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, cy_hor_diff)
|
x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, cy_hor_diff)
|
||||||
x_starting = np.array(x_starting)
|
|
||||||
x_ending = np.array(x_ending)
|
|
||||||
y_type_2 = np.array(y_type_2)
|
|
||||||
y_diff_type_2 = np.array(y_diff_type_2)
|
|
||||||
|
|
||||||
|
all_columns = set(range(len(peaks_neg_tot) - 1))
|
||||||
if ((reading_order_type==1) or
|
if ((reading_order_type==1) or
|
||||||
(reading_order_type==0 and
|
(reading_order_type==0 and
|
||||||
(len(y_lines_without_mother)>=2 or there_is_sep_with_child==1))):
|
(len(y_lines_without_mother)>=2 or there_is_sep_with_child==1))):
|
||||||
try:
|
try:
|
||||||
y_grenze=int(splitter_y_new[i])+300
|
y_grenze = splitter_y_new[i] + 300
|
||||||
#check if there is a big separator in this y_mains_sep_ohne_grenzen
|
#check if there is a big separator in this y_mains_sep_ohne_grenzen
|
||||||
|
|
||||||
args_early_ys=np.arange(len(y_type_2))
|
args_early_ys=np.arange(len(y_type_2))
|
||||||
#print(args_early_ys,'args_early_ys')
|
#print(args_early_ys,'args_early_ys')
|
||||||
#print(int(splitter_y_new[i]),int(splitter_y_new[i+1]))
|
#print(splitter_y_new[i], splitter_y_new[i+1])
|
||||||
|
|
||||||
x_starting_up = x_starting[(y_type_2 > int(splitter_y_new[i])) &
|
x_starting_up = x_starting[(y_type_2 > splitter_y_new[i]) &
|
||||||
(y_type_2 <= y_grenze)]
|
(y_type_2 <= y_grenze)]
|
||||||
x_ending_up = x_ending[(y_type_2 > int(splitter_y_new[i])) &
|
x_ending_up = x_ending[(y_type_2 > splitter_y_new[i]) &
|
||||||
(y_type_2 <= y_grenze)]
|
(y_type_2 <= y_grenze)]
|
||||||
y_type_2_up = y_type_2[(y_type_2 > int(splitter_y_new[i])) &
|
y_type_2_up = y_type_2[(y_type_2 > splitter_y_new[i]) &
|
||||||
(y_type_2 <= y_grenze)]
|
(y_type_2 <= y_grenze)]
|
||||||
y_diff_type_2_up = y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) &
|
y_diff_type_2_up = y_diff_type_2[(y_type_2 > splitter_y_new[i]) &
|
||||||
(y_type_2 <= y_grenze)]
|
(y_type_2 <= y_grenze)]
|
||||||
args_up = args_early_ys[(y_type_2 > int(splitter_y_new[i])) &
|
args_up = args_early_ys[(y_type_2 > splitter_y_new[i]) &
|
||||||
(y_type_2 <= y_grenze)]
|
(y_type_2 <= y_grenze)]
|
||||||
if len(y_type_2_up) > 0:
|
if len(y_type_2_up) > 0:
|
||||||
y_main_separator_up = y_type_2_up [(x_starting_up==0) &
|
y_main_separator_up = y_type_2_up [(x_starting_up==0) &
|
||||||
|
@ -1776,8 +1769,8 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) ))
|
args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) ))
|
||||||
#print(args_to_be_kept,'args_to_be_kept')
|
#print(args_to_be_kept,'args_to_be_kept')
|
||||||
boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
|
boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
|
||||||
int(splitter_y_new[i]), int( np.max(y_diff_main_separator_up))])
|
splitter_y_new[i], y_diff_main_separator_up.max()])
|
||||||
splitter_y_new[i]=[ np.max(y_diff_main_separator_up) ][0]
|
splitter_y_new[i] = y_diff_main_separator_up.max()
|
||||||
|
|
||||||
#print(splitter_y_new[i],'splitter_y_new[i]')
|
#print(splitter_y_new[i],'splitter_y_new[i]')
|
||||||
y_type_2 = y_type_2[args_to_be_kept]
|
y_type_2 = y_type_2[args_to_be_kept]
|
||||||
|
@ -1786,29 +1779,28 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
y_diff_type_2 = y_diff_type_2[args_to_be_kept]
|
y_diff_type_2 = y_diff_type_2[args_to_be_kept]
|
||||||
|
|
||||||
#print('galdiha')
|
#print('galdiha')
|
||||||
y_grenze=int(splitter_y_new[i])+200
|
y_grenze = splitter_y_new[i] + 200
|
||||||
args_early_ys2=np.arange(len(y_type_2))
|
args_early_ys2=np.arange(len(y_type_2))
|
||||||
y_type_2_up=y_type_2[(y_type_2 > int(splitter_y_new[i])) &
|
y_type_2_up=y_type_2[(y_type_2 > splitter_y_new[i]) &
|
||||||
(y_type_2 <= y_grenze)]
|
(y_type_2 <= y_grenze)]
|
||||||
x_starting_up=x_starting[(y_type_2 > int(splitter_y_new[i])) &
|
x_starting_up=x_starting[(y_type_2 > splitter_y_new[i]) &
|
||||||
(y_type_2 <= y_grenze)]
|
(y_type_2 <= y_grenze)]
|
||||||
x_ending_up=x_ending[(y_type_2 > int(splitter_y_new[i])) &
|
x_ending_up=x_ending[(y_type_2 > splitter_y_new[i]) &
|
||||||
(y_type_2 <= y_grenze)]
|
(y_type_2 <= y_grenze)]
|
||||||
y_diff_type_2_up=y_diff_type_2[(y_type_2 > int(splitter_y_new[i])) &
|
y_diff_type_2_up=y_diff_type_2[(y_type_2 > splitter_y_new[i]) &
|
||||||
(y_type_2 <= y_grenze)]
|
(y_type_2 <= y_grenze)]
|
||||||
args_up2=args_early_ys2[(y_type_2 > int(splitter_y_new[i])) &
|
args_up2=args_early_ys2[(y_type_2 > splitter_y_new[i]) &
|
||||||
(y_type_2 <= y_grenze)]
|
(y_type_2 <= y_grenze)]
|
||||||
#print(y_type_2_up,x_starting_up,x_ending_up,'didid')
|
#print(y_type_2_up,x_starting_up,x_ending_up,'didid')
|
||||||
nodes_in = []
|
nodes_in = set()
|
||||||
for ij in range(len(x_starting_up)):
|
for ij in range(len(x_starting_up)):
|
||||||
nodes_in = nodes_in + list(range(x_starting_up[ij],
|
nodes_in.update(range(x_starting_up[ij],
|
||||||
x_ending_up[ij]))
|
x_ending_up[ij]))
|
||||||
nodes_in = np.unique(nodes_in)
|
|
||||||
#print(nodes_in,'nodes_in')
|
#print(nodes_in,'nodes_in')
|
||||||
|
|
||||||
if set(nodes_in)==set(range(len(peaks_neg_tot)-1)):
|
if nodes_in == set(range(len(peaks_neg_tot)-1)):
|
||||||
pass
|
pass
|
||||||
elif set(nodes_in)==set(range(1, len(peaks_neg_tot)-1)):
|
elif nodes_in == set(range(1, len(peaks_neg_tot)-1)):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
#print('burdaydikh')
|
#print('burdaydikh')
|
||||||
|
@ -1823,17 +1815,16 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
pass
|
pass
|
||||||
#print('burdaydikh2')
|
#print('burdaydikh2')
|
||||||
elif len(y_diff_main_separator_up)==0:
|
elif len(y_diff_main_separator_up)==0:
|
||||||
nodes_in = []
|
nodes_in = set()
|
||||||
for ij in range(len(x_starting_up)):
|
for ij in range(len(x_starting_up)):
|
||||||
nodes_in = nodes_in + list(range(x_starting_up[ij],
|
nodes_in.update(range(x_starting_up[ij],
|
||||||
x_ending_up[ij]))
|
x_ending_up[ij]))
|
||||||
nodes_in = np.unique(nodes_in)
|
|
||||||
#print(nodes_in,'nodes_in2')
|
#print(nodes_in,'nodes_in2')
|
||||||
#print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
|
#print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
|
||||||
|
|
||||||
if set(nodes_in)==set(range(len(peaks_neg_tot)-1)):
|
if nodes_in == set(range(len(peaks_neg_tot)-1)):
|
||||||
pass
|
pass
|
||||||
elif set(nodes_in)==set(range(1,len(peaks_neg_tot)-1)):
|
elif nodes_in == set(range(1,len(peaks_neg_tot)-1)):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
#print('burdaydikh')
|
#print('burdaydikh')
|
||||||
|
@ -1858,26 +1849,24 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
x_end_by_order=[]
|
x_end_by_order=[]
|
||||||
if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1:
|
if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1:
|
||||||
if reading_order_type==1:
|
if reading_order_type==1:
|
||||||
y_lines_by_order.append(int(splitter_y_new[i]))
|
y_lines_by_order.append(splitter_y_new[i])
|
||||||
x_start_by_order.append(0)
|
x_start_by_order.append(0)
|
||||||
x_end_by_order.append(len(peaks_neg_tot)-2)
|
x_end_by_order.append(len(peaks_neg_tot)-2)
|
||||||
else:
|
else:
|
||||||
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
|
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
|
||||||
columns_covered_by_mothers = []
|
columns_covered_by_mothers = set()
|
||||||
for dj in range(len(x_start_without_mother)):
|
for dj in range(len(x_start_without_mother)):
|
||||||
columns_covered_by_mothers = columns_covered_by_mothers + \
|
columns_covered_by_mothers.update(
|
||||||
list(range(x_start_without_mother[dj],
|
range(x_start_without_mother[dj],
|
||||||
x_end_without_mother[dj]))
|
x_end_without_mother[dj]))
|
||||||
columns_covered_by_mothers = list(set(columns_covered_by_mothers))
|
columns_not_covered = list(all_columns - columns_covered_by_mothers)
|
||||||
|
y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + len(x_start_without_mother),
|
||||||
all_columns=np.arange(len(peaks_neg_tot)-1)
|
dtype=int) * splitter_y_new[i])
|
||||||
columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers))
|
##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
|
||||||
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
|
|
||||||
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
|
|
||||||
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||||
x_starting = np.append(x_starting, columns_not_covered)
|
x_starting = np.append(x_starting, np.array(columns_not_covered, int))
|
||||||
x_starting = np.append(x_starting, x_start_without_mother)
|
x_starting = np.append(x_starting, x_start_without_mother)
|
||||||
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
|
x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
|
||||||
x_ending = np.append(x_ending, x_end_without_mother)
|
x_ending = np.append(x_ending, x_end_without_mother)
|
||||||
|
|
||||||
ind_args=np.arange(len(y_type_2))
|
ind_args=np.arange(len(y_type_2))
|
||||||
|
@ -1906,39 +1895,34 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
x_end_by_order.append(x_end_column_sort[ii]-1)
|
x_end_by_order.append(x_end_column_sort[ii]-1)
|
||||||
else:
|
else:
|
||||||
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
|
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
|
||||||
columns_covered_by_mothers = []
|
columns_covered_by_mothers = set()
|
||||||
for dj in range(len(x_start_without_mother)):
|
for dj in range(len(x_start_without_mother)):
|
||||||
columns_covered_by_mothers = columns_covered_by_mothers + \
|
columns_covered_by_mothers.update(
|
||||||
list(range(x_start_without_mother[dj],
|
range(x_start_without_mother[dj],
|
||||||
x_end_without_mother[dj]))
|
x_end_without_mother[dj]))
|
||||||
columns_covered_by_mothers = list(set(columns_covered_by_mothers))
|
columns_not_covered = list(all_columns - columns_covered_by_mothers)
|
||||||
|
y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + len(x_start_without_mother),
|
||||||
all_columns=np.arange(len(peaks_neg_tot)-1)
|
dtype=int) * splitter_y_new[i])
|
||||||
columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers))
|
##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
|
||||||
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
|
|
||||||
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
|
|
||||||
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||||
x_starting = np.append(x_starting, columns_not_covered)
|
x_starting = np.append(x_starting, np.array(columns_not_covered, int))
|
||||||
x_starting = np.append(x_starting, x_start_without_mother)
|
x_starting = np.append(x_starting, x_start_without_mother)
|
||||||
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
|
x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
|
||||||
x_ending = np.append(x_ending, x_end_without_mother)
|
x_ending = np.append(x_ending, x_end_without_mother)
|
||||||
|
|
||||||
columns_covered_by_with_child_no_mothers = []
|
columns_covered_by_with_child_no_mothers = set()
|
||||||
for dj in range(len(x_end_with_child_without_mother)):
|
for dj in range(len(x_end_with_child_without_mother)):
|
||||||
columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \
|
columns_covered_by_with_child_no_mothers.update(
|
||||||
list(range(x_start_with_child_without_mother[dj],
|
range(x_start_with_child_without_mother[dj],
|
||||||
x_end_with_child_without_mother[dj]))
|
x_end_with_child_without_mother[dj]))
|
||||||
columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers))
|
columns_not_covered_child_no_mother = list(all_columns - columns_covered_by_with_child_no_mothers)
|
||||||
|
|
||||||
all_columns = np.arange(len(peaks_neg_tot)-1)
|
|
||||||
columns_not_covered_child_no_mother = list(set(all_columns) - set(columns_covered_by_with_child_no_mothers))
|
|
||||||
#indexes_to_be_spanned=[]
|
#indexes_to_be_spanned=[]
|
||||||
for i_s in range(len(x_end_with_child_without_mother)):
|
for i_s in range(len(x_end_with_child_without_mother)):
|
||||||
columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s])
|
columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s])
|
||||||
columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother)
|
columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother)
|
||||||
ind_args = np.arange(len(y_type_2))
|
ind_args = np.arange(len(y_type_2))
|
||||||
x_end_with_child_without_mother = np.array(x_end_with_child_without_mother)
|
x_end_with_child_without_mother = np.array(x_end_with_child_without_mother, int)
|
||||||
x_start_with_child_without_mother = np.array(x_start_with_child_without_mother)
|
x_start_with_child_without_mother = np.array(x_start_with_child_without_mother, int)
|
||||||
for i_s_nc in columns_not_covered_child_no_mother:
|
for i_s_nc in columns_not_covered_child_no_mother:
|
||||||
if i_s_nc in x_start_with_child_without_mother:
|
if i_s_nc in x_start_with_child_without_mother:
|
||||||
x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0]
|
x_end_biggest_column = x_end_with_child_without_mother[x_start_with_child_without_mother==i_s_nc][0]
|
||||||
|
@ -1951,7 +1935,7 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
for i_c in range(len(y_column_nc)):
|
for i_c in range(len(y_column_nc)):
|
||||||
if i_c==(len(y_column_nc)-1):
|
if i_c==(len(y_column_nc)-1):
|
||||||
ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) &
|
ind_all_lines_between_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) &
|
||||||
(y_type_2<int(splitter_y_new[i+1])) &
|
(y_type_2<splitter_y_new[i+1]) &
|
||||||
(x_starting>=i_s_nc) &
|
(x_starting>=i_s_nc) &
|
||||||
(x_ending<=x_end_biggest_column)]
|
(x_ending<=x_end_biggest_column)]
|
||||||
else:
|
else:
|
||||||
|
@ -1967,21 +1951,19 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
if len(x_diff_all_between_nm_wc)>0:
|
if len(x_diff_all_between_nm_wc)>0:
|
||||||
biggest=np.argmax(x_diff_all_between_nm_wc)
|
biggest=np.argmax(x_diff_all_between_nm_wc)
|
||||||
|
|
||||||
columns_covered_by_mothers = []
|
columns_covered_by_mothers = set()
|
||||||
for dj in range(len(x_starting_all_between_nm_wc)):
|
for dj in range(len(x_starting_all_between_nm_wc)):
|
||||||
columns_covered_by_mothers = columns_covered_by_mothers + \
|
columns_covered_by_mothers.update(
|
||||||
list(range(x_starting_all_between_nm_wc[dj],
|
range(x_starting_all_between_nm_wc[dj],
|
||||||
x_ending_all_between_nm_wc[dj]))
|
x_ending_all_between_nm_wc[dj]))
|
||||||
columns_covered_by_mothers = list(set(columns_covered_by_mothers))
|
child_columns = set(range(i_s_nc, x_end_biggest_column))
|
||||||
|
columns_not_covered = list(child_columns - columns_covered_by_mothers)
|
||||||
all_columns=np.arange(i_s_nc, x_end_biggest_column)
|
|
||||||
columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers))
|
|
||||||
|
|
||||||
should_longest_line_be_extended=0
|
should_longest_line_be_extended=0
|
||||||
if (len(x_diff_all_between_nm_wc) > 0 and
|
if (len(x_diff_all_between_nm_wc) > 0 and
|
||||||
set(list(range(x_starting_all_between_nm_wc[biggest],
|
set(list(range(x_starting_all_between_nm_wc[biggest],
|
||||||
x_ending_all_between_nm_wc[biggest])) +
|
x_ending_all_between_nm_wc[biggest])) +
|
||||||
list(columns_not_covered)) != set(all_columns)):
|
list(columns_not_covered)) != child_columns):
|
||||||
should_longest_line_be_extended=1
|
should_longest_line_be_extended=1
|
||||||
index_lines_so_close_to_top_separator = \
|
index_lines_so_close_to_top_separator = \
|
||||||
np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) &
|
np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) &
|
||||||
|
@ -2008,8 +1990,8 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
pass
|
pass
|
||||||
|
|
||||||
y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered))
|
y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered))
|
||||||
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, columns_not_covered)
|
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, np.array(columns_not_covered, int))
|
||||||
x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered) + 1)
|
x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1)
|
||||||
|
|
||||||
ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
|
ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
|
||||||
for column in range(i_s_nc, x_end_biggest_column):
|
for column in range(i_s_nc, x_end_biggest_column):
|
||||||
|
@ -2078,7 +2060,7 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
if len(y_in_cols)>0:
|
if len(y_in_cols)>0:
|
||||||
y_down=np.min(y_in_cols)
|
y_down=np.min(y_in_cols)
|
||||||
else:
|
else:
|
||||||
y_down=[int(splitter_y_new[i+1])][0]
|
y_down=splitter_y_new[i+1]
|
||||||
#print(y_itself,'y_itself')
|
#print(y_itself,'y_itself')
|
||||||
boxes.append([peaks_neg_tot[column],
|
boxes.append([peaks_neg_tot[column],
|
||||||
peaks_neg_tot[column+1],
|
peaks_neg_tot[column+1],
|
||||||
|
@ -2086,45 +2068,42 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
y_down])
|
y_down])
|
||||||
except:
|
except:
|
||||||
boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
|
boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
|
||||||
int(splitter_y_new[i]), int(splitter_y_new[i+1])])
|
splitter_y_new[i], splitter_y_new[i+1]])
|
||||||
else:
|
else:
|
||||||
y_lines_by_order=[]
|
y_lines_by_order=[]
|
||||||
x_start_by_order=[]
|
x_start_by_order=[]
|
||||||
x_end_by_order=[]
|
x_end_by_order=[]
|
||||||
if len(x_starting)>0:
|
if len(x_starting)>0:
|
||||||
all_columns = np.arange(len(peaks_neg_tot)-1)
|
columns_covered_by_lines_covered_more_than_2col = set()
|
||||||
columns_covered_by_lines_covered_more_than_2col = []
|
|
||||||
for dj in range(len(x_starting)):
|
for dj in range(len(x_starting)):
|
||||||
if set(list(range(x_starting[dj],x_ending[dj]))) == set(all_columns):
|
if set(range(x_starting[dj], x_ending[dj])) != all_columns:
|
||||||
pass
|
columns_covered_by_lines_covered_more_than_2col.update(
|
||||||
else:
|
range(x_starting[dj], x_ending[dj]))
|
||||||
columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \
|
columns_not_covered = list(all_columns - columns_covered_by_lines_covered_more_than_2col)
|
||||||
list(range(x_starting[dj],x_ending[dj]))
|
|
||||||
columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col))
|
|
||||||
columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col))
|
|
||||||
|
|
||||||
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1))
|
y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered) + 1,
|
||||||
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
|
dtype=int) * splitter_y_new[i])
|
||||||
|
##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
|
||||||
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||||
x_starting = np.append(x_starting, columns_not_covered)
|
x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
|
||||||
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
|
x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
|
||||||
if len(new_main_sep_y) > 0:
|
if len(new_main_sep_y) > 0:
|
||||||
x_starting = np.append(x_starting, 0)
|
x_starting = np.append(x_starting, 0)
|
||||||
x_ending = np.append(x_ending, len(peaks_neg_tot)-1)
|
x_ending = np.append(x_ending, len(peaks_neg_tot) - 1)
|
||||||
else:
|
else:
|
||||||
x_starting = np.append(x_starting, x_starting[0])
|
x_starting = np.append(x_starting, x_starting[0])
|
||||||
x_ending = np.append(x_ending, x_ending[0])
|
x_ending = np.append(x_ending, x_ending[0])
|
||||||
else:
|
else:
|
||||||
all_columns = np.arange(len(peaks_neg_tot)-1)
|
columns_not_covered = list(all_columns)
|
||||||
columns_not_covered = list(set(all_columns))
|
y_type_2 = np.append(y_type_2, np.ones(len(columns_not_covered),
|
||||||
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered))
|
dtype=int) * splitter_y_new[i])
|
||||||
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
|
##y_lines_by_order = np.append(y_lines_by_order, [splitter_y_new[i]] * len(columns_not_covered))
|
||||||
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||||
x_starting = np.append(x_starting, columns_not_covered)
|
x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
|
||||||
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
|
x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
|
||||||
|
|
||||||
|
ind_args = np.arange(len(y_type_2))
|
||||||
|
|
||||||
ind_args=np.array(range(len(y_type_2)))
|
|
||||||
#ind_args=np.array(ind_args)
|
|
||||||
for column in range(len(peaks_neg_tot)-1):
|
for column in range(len(peaks_neg_tot)-1):
|
||||||
#print(column,'column')
|
#print(column,'column')
|
||||||
ind_args_in_col=ind_args[x_starting==column]
|
ind_args_in_col=ind_args[x_starting==column]
|
||||||
|
@ -2155,7 +2134,6 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
x_start_itself=x_start_copy.pop(il)
|
x_start_itself=x_start_copy.pop(il)
|
||||||
x_end_itself=x_end_copy.pop(il)
|
x_end_itself=x_end_copy.pop(il)
|
||||||
|
|
||||||
#print(y_copy,'y_copy2')
|
|
||||||
for column in range(x_start_itself, x_end_itself+1):
|
for column in range(x_start_itself, x_end_itself+1):
|
||||||
#print(column,'cols')
|
#print(column,'cols')
|
||||||
y_in_cols=[]
|
y_in_cols=[]
|
||||||
|
@ -2170,7 +2148,7 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
if len(y_in_cols)>0:
|
if len(y_in_cols)>0:
|
||||||
y_down=np.min(y_in_cols)
|
y_down=np.min(y_in_cols)
|
||||||
else:
|
else:
|
||||||
y_down=[int(splitter_y_new[i+1])][0]
|
y_down=splitter_y_new[i+1]
|
||||||
#print(y_itself,'y_itself')
|
#print(y_itself,'y_itself')
|
||||||
boxes.append([peaks_neg_tot[column],
|
boxes.append([peaks_neg_tot[column],
|
||||||
peaks_neg_tot[column+1],
|
peaks_neg_tot[column+1],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue