mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-27 07:44:12 +01:00
return_boxes_of_images_by_order_of_reading_new: indent
(by removing unnecessary conditional)
This commit is contained in:
parent
66a0e55e49
commit
3ebbc2d693
1 changed files with 421 additions and 422 deletions
|
|
@ -1641,241 +1641,204 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
#if (len(matrix_new[:,9][matrix_new[:,9]==1]) > 0 and
|
#if (len(matrix_new[:,9][matrix_new[:,9]==1]) > 0 and
|
||||||
# np.max(matrix_new[:,8][matrix_new[:,9]==1]) >=
|
# np.max(matrix_new[:,8][matrix_new[:,9]==1]) >=
|
||||||
# 0.1 * (np.abs(bot-top))):
|
# 0.1 * (np.abs(bot-top))):
|
||||||
if True:
|
try:
|
||||||
try:
|
num_col, peaks_neg_fin = find_num_col(
|
||||||
num_col, peaks_neg_fin = find_num_col(
|
regions_without_separators[top:bot],
|
||||||
regions_without_separators[top:bot],
|
# we do not expect to get all columns in small parts (headings etc.):
|
||||||
# we do not expect to get all columns in small parts (headings etc.):
|
num_col_classifier if bot - top >= big_part else 1,
|
||||||
num_col_classifier if bot - top >= big_part else 1,
|
tables, multiplier=6. if erosion_hurts else 7.)
|
||||||
tables, multiplier=6. if erosion_hurts else 7.)
|
except:
|
||||||
except:
|
peaks_neg_fin=[]
|
||||||
peaks_neg_fin=[]
|
num_col = 0
|
||||||
num_col = 0
|
try:
|
||||||
try:
|
if ((len(peaks_neg_fin) + 1 < num_col_classifier or
|
||||||
if ((len(peaks_neg_fin) + 1 < num_col_classifier or
|
num_col_classifier == 6) and
|
||||||
num_col_classifier == 6) and
|
# we do not expect to get all columns in small parts (headings etc.):
|
||||||
# we do not expect to get all columns in small parts (headings etc.):
|
bot - top >= big_part):
|
||||||
bot - top >= big_part):
|
# found too few columns here
|
||||||
# found too few columns here
|
#print('burda')
|
||||||
#print('burda')
|
peaks_neg_fin_org = np.copy(peaks_neg_fin)
|
||||||
peaks_neg_fin_org = np.copy(peaks_neg_fin)
|
#print("peaks_neg_fin_org", peaks_neg_fin_org)
|
||||||
#print("peaks_neg_fin_org", peaks_neg_fin_org)
|
if len(peaks_neg_fin)==0:
|
||||||
if len(peaks_neg_fin)==0:
|
num_col, peaks_neg_fin = find_num_col(
|
||||||
num_col, peaks_neg_fin = find_num_col(
|
regions_without_separators[top:bot],
|
||||||
regions_without_separators[top:bot],
|
num_col_classifier, tables, multiplier=3.)
|
||||||
num_col_classifier, tables, multiplier=3.)
|
#print(peaks_neg_fin,'peaks_neg_fin')
|
||||||
#print(peaks_neg_fin,'peaks_neg_fin')
|
peaks_neg_fin_early = [0] + peaks_neg_fin + [width_tot-1]
|
||||||
peaks_neg_fin_early = [0] + peaks_neg_fin + [width_tot-1]
|
|
||||||
|
|
||||||
#print(peaks_neg_fin_early,'burda2')
|
#print(peaks_neg_fin_early,'burda2')
|
||||||
peaks_neg_fin_rev=[]
|
peaks_neg_fin_rev=[]
|
||||||
for left, right in pairwise(peaks_neg_fin_early):
|
for left, right in pairwise(peaks_neg_fin_early):
|
||||||
# print("%d:%d" % (left, right), 'i_n')
|
# print("%d:%d" % (left, right), 'i_n')
|
||||||
# dbg_plt([left, right, top, bot],
|
# dbg_plt([left, right, top, bot],
|
||||||
# "image cut for y split %d:%d / x gap %d:%d" % (
|
# "image cut for y split %d:%d / x gap %d:%d" % (
|
||||||
# top, bot, left, right))
|
# top, bot, left, right))
|
||||||
# plt.plot(regions_without_separators[top:bot, left:right].sum(axis=0))
|
# plt.plot(regions_without_separators[top:bot, left:right].sum(axis=0))
|
||||||
# plt.title("vertical projection (sum over y)")
|
# plt.title("vertical projection (sum over y)")
|
||||||
# plt.show()
|
# plt.show()
|
||||||
try:
|
try:
|
||||||
_, peaks_neg_fin1 = find_num_col(
|
_, peaks_neg_fin1 = find_num_col(
|
||||||
regions_without_separators[top:bot, left:right],
|
regions_without_separators[top:bot, left:right],
|
||||||
num_col_classifier, tables, multiplier=7.)
|
num_col_classifier, tables, multiplier=7.)
|
||||||
except:
|
except:
|
||||||
peaks_neg_fin1 = []
|
peaks_neg_fin1 = []
|
||||||
try:
|
try:
|
||||||
_, peaks_neg_fin2 = find_num_col(
|
_, peaks_neg_fin2 = find_num_col(
|
||||||
regions_without_separators[top:bot, left:right],
|
regions_without_separators[top:bot, left:right],
|
||||||
num_col_classifier, tables, multiplier=5.)
|
num_col_classifier, tables, multiplier=5.)
|
||||||
except:
|
except:
|
||||||
peaks_neg_fin2 = []
|
peaks_neg_fin2 = []
|
||||||
if len(peaks_neg_fin1) >= len(peaks_neg_fin2):
|
if len(peaks_neg_fin1) >= len(peaks_neg_fin2):
|
||||||
peaks_neg_fin = peaks_neg_fin1
|
peaks_neg_fin = peaks_neg_fin1
|
||||||
else:
|
|
||||||
peaks_neg_fin = peaks_neg_fin2
|
|
||||||
# add offset to local result
|
|
||||||
peaks_neg_fin = list(np.array(peaks_neg_fin) + left)
|
|
||||||
#print(peaks_neg_fin,'peaks_neg_fin')
|
|
||||||
|
|
||||||
peaks_neg_fin_rev.extend(peaks_neg_fin)
|
|
||||||
if right < peaks_neg_fin_early[-1]:
|
|
||||||
# all but the last column: interject the preexisting boundary
|
|
||||||
peaks_neg_fin_rev.append(right)
|
|
||||||
#print(peaks_neg_fin_rev,'peaks_neg_fin_rev')
|
|
||||||
|
|
||||||
if len(peaks_neg_fin_rev) >= len(peaks_neg_fin_org):
|
|
||||||
peaks_neg_fin = peaks_neg_fin_rev
|
|
||||||
else:
|
else:
|
||||||
peaks_neg_fin = peaks_neg_fin_org
|
peaks_neg_fin = peaks_neg_fin2
|
||||||
num_col = len(peaks_neg_fin)
|
# add offset to local result
|
||||||
|
peaks_neg_fin = list(np.array(peaks_neg_fin) + left)
|
||||||
#print(peaks_neg_fin,'peaks_neg_fin')
|
#print(peaks_neg_fin,'peaks_neg_fin')
|
||||||
except:
|
|
||||||
logger.exception("cannot find peaks consistent with columns")
|
|
||||||
#num_col, peaks_neg_fin = find_num_col(
|
|
||||||
# regions_without_separators[top:bot,:],
|
|
||||||
# multiplier=7.0)
|
|
||||||
x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ]
|
|
||||||
x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ]
|
|
||||||
cy_hor_some=matrix_new[:,5][ (matrix_new[:,9]==0) ]
|
|
||||||
y_max_hor_some=matrix_new[:,7][ (matrix_new[:,9]==0) ]
|
|
||||||
|
|
||||||
if right2left_readingorder:
|
peaks_neg_fin_rev.extend(peaks_neg_fin)
|
||||||
x_max_hor_some_new = width_tot - x_min_hor_some
|
if right < peaks_neg_fin_early[-1]:
|
||||||
x_min_hor_some_new = width_tot - x_max_hor_some
|
# all but the last column: interject the preexisting boundary
|
||||||
x_min_hor_some =list(np.copy(x_min_hor_some_new))
|
peaks_neg_fin_rev.append(right)
|
||||||
x_max_hor_some =list(np.copy(x_max_hor_some_new))
|
#print(peaks_neg_fin_rev,'peaks_neg_fin_rev')
|
||||||
|
|
||||||
peaks_neg_tot = np.array([0] + peaks_neg_fin + [width_tot])
|
if len(peaks_neg_fin_rev) >= len(peaks_neg_fin_org):
|
||||||
#print(peaks_neg_tot,'peaks_neg_tot')
|
peaks_neg_fin = peaks_neg_fin_rev
|
||||||
peaks_neg_tot_tables.append(peaks_neg_tot)
|
else:
|
||||||
|
peaks_neg_fin = peaks_neg_fin_org
|
||||||
|
num_col = len(peaks_neg_fin)
|
||||||
|
#print(peaks_neg_fin,'peaks_neg_fin')
|
||||||
|
except:
|
||||||
|
logger.exception("cannot find peaks consistent with columns")
|
||||||
|
#num_col, peaks_neg_fin = find_num_col(
|
||||||
|
# regions_without_separators[top:bot,:],
|
||||||
|
# multiplier=7.0)
|
||||||
|
x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ]
|
||||||
|
x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ]
|
||||||
|
cy_hor_some=matrix_new[:,5][ (matrix_new[:,9]==0) ]
|
||||||
|
y_max_hor_some=matrix_new[:,7][ (matrix_new[:,9]==0) ]
|
||||||
|
|
||||||
all_columns = set(range(len(peaks_neg_tot) - 1))
|
if right2left_readingorder:
|
||||||
#print("all_columns", all_columns)
|
x_max_hor_some_new = width_tot - x_min_hor_some
|
||||||
|
x_min_hor_some_new = width_tot - x_max_hor_some
|
||||||
|
x_min_hor_some =list(np.copy(x_min_hor_some_new))
|
||||||
|
x_max_hor_some =list(np.copy(x_max_hor_some_new))
|
||||||
|
|
||||||
reading_order_type, x_starting, x_ending, y_mid, y_max, \
|
peaks_neg_tot = np.array([0] + peaks_neg_fin + [width_tot])
|
||||||
y_mid_without_mother, x_start_without_mother, x_end_without_mother, \
|
#print(peaks_neg_tot,'peaks_neg_tot')
|
||||||
there_is_sep_with_child, \
|
peaks_neg_tot_tables.append(peaks_neg_tot)
|
||||||
y_mid_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \
|
|
||||||
new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order(
|
|
||||||
x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, y_max_hor_some)
|
|
||||||
|
|
||||||
# show multi-column separators
|
all_columns = set(range(len(peaks_neg_tot) - 1))
|
||||||
# dbg_plt([0, None, top, bot], "multi-column separators in current split",
|
#print("all_columns", all_columns)
|
||||||
# list(zip(peaks_neg_tot[x_starting], peaks_neg_tot[x_ending],
|
|
||||||
# y_mid - top, y_max - top)), True)
|
|
||||||
|
|
||||||
if (reading_order_type == 1 or
|
reading_order_type, x_starting, x_ending, y_mid, y_max, \
|
||||||
len(y_mid_without_mother) >= 2 or
|
y_mid_without_mother, x_start_without_mother, x_end_without_mother, \
|
||||||
there_is_sep_with_child == 1):
|
there_is_sep_with_child, \
|
||||||
# there are top-level multi-colspan horizontal separators which overlap each other
|
y_mid_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \
|
||||||
# or multiple top-level multi-colspan horizontal separators
|
new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order(
|
||||||
# or multi-colspan horizontal separators shorter than their respective top-level:
|
x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, y_max_hor_some)
|
||||||
# todo: explain how this is dealt with
|
|
||||||
try:
|
|
||||||
y_grenze = top + 300
|
|
||||||
up = (y_mid > top) & (y_mid <= y_grenze)
|
|
||||||
|
|
||||||
args_early_ys=np.arange(len(y_mid))
|
# show multi-column separators
|
||||||
#print(args_early_ys,'args_early_ys')
|
# dbg_plt([0, None, top, bot], "multi-column separators in current split",
|
||||||
#print(y_mid,'y_mid')
|
# list(zip(peaks_neg_tot[x_starting], peaks_neg_tot[x_ending],
|
||||||
|
# y_mid - top, y_max - top)), True)
|
||||||
|
|
||||||
x_starting_up = x_starting[up]
|
if (reading_order_type == 1 or
|
||||||
x_ending_up = x_ending[up]
|
len(y_mid_without_mother) >= 2 or
|
||||||
y_mid_up = y_mid[up]
|
there_is_sep_with_child == 1):
|
||||||
y_max_up = y_max[up]
|
# there are top-level multi-colspan horizontal separators which overlap each other
|
||||||
args_up = args_early_ys[up]
|
# or multiple top-level multi-colspan horizontal separators
|
||||||
#print(args_up,'args_up')
|
# or multi-colspan horizontal separators shorter than their respective top-level:
|
||||||
#print(y_mid_up,'y_mid_up')
|
# todo: explain how this is dealt with
|
||||||
#check if there is a big separator in this y_mains0
|
try:
|
||||||
if len(y_mid_up) > 0:
|
y_grenze = top + 300
|
||||||
# is there a separator with full-width span?
|
up = (y_mid > top) & (y_mid <= y_grenze)
|
||||||
main_separator = (x_starting_up == 0) & (x_ending_up == len(peaks_neg_tot) - 1)
|
|
||||||
y_mid_main_separator_up = y_mid_up[main_separator]
|
|
||||||
y_max_main_separator_up = y_max_up[main_separator]
|
|
||||||
args_main_to_deleted = args_up[main_separator]
|
|
||||||
#print(y_mid_main_separator_up,y_max_main_separator_up,args_main_to_deleted,'fffffjammmm')
|
|
||||||
if len(y_max_main_separator_up):
|
|
||||||
args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) ))
|
|
||||||
#print(args_to_be_kept,'args_to_be_kept')
|
|
||||||
boxes.append([0, peaks_neg_tot[-1],
|
|
||||||
top, y_max_main_separator_up.max()])
|
|
||||||
# dbg_plt(boxes[-1], "near top main separator box")
|
|
||||||
top = y_max_main_separator_up.max()
|
|
||||||
|
|
||||||
#print(top,'top')
|
args_early_ys=np.arange(len(y_mid))
|
||||||
y_mid = y_mid[args_to_be_kept]
|
#print(args_early_ys,'args_early_ys')
|
||||||
x_starting = x_starting[args_to_be_kept]
|
#print(y_mid,'y_mid')
|
||||||
x_ending = x_ending[args_to_be_kept]
|
|
||||||
y_max = y_max[args_to_be_kept]
|
|
||||||
|
|
||||||
#print('galdiha')
|
x_starting_up = x_starting[up]
|
||||||
y_grenze = top + 200
|
x_ending_up = x_ending[up]
|
||||||
up = (y_mid > top) & (y_mid <= y_grenze)
|
y_mid_up = y_mid[up]
|
||||||
args_early_ys2 = np.arange(len(y_mid))
|
y_max_up = y_max[up]
|
||||||
x_starting_up = x_starting[up]
|
args_up = args_early_ys[up]
|
||||||
x_ending_up = x_ending[up]
|
#print(args_up,'args_up')
|
||||||
y_mid_up = y_mid[up]
|
#print(y_mid_up,'y_mid_up')
|
||||||
y_max_up = y_max[up]
|
#check if there is a big separator in this y_mains0
|
||||||
args_up2 = args_early_ys2[up]
|
if len(y_mid_up) > 0:
|
||||||
#print(y_mid_up,x_starting_up,x_ending_up,'didid')
|
# is there a separator with full-width span?
|
||||||
else:
|
main_separator = (x_starting_up == 0) & (x_ending_up == len(peaks_neg_tot) - 1)
|
||||||
args_early_ys2 = args_early_ys
|
y_mid_main_separator_up = y_mid_up[main_separator]
|
||||||
args_up2 = args_up
|
y_max_main_separator_up = y_max_up[main_separator]
|
||||||
|
args_main_to_deleted = args_up[main_separator]
|
||||||
|
#print(y_mid_main_separator_up,y_max_main_separator_up,args_main_to_deleted,'fffffjammmm')
|
||||||
|
if len(y_max_main_separator_up):
|
||||||
|
args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) ))
|
||||||
|
#print(args_to_be_kept,'args_to_be_kept')
|
||||||
|
boxes.append([0, peaks_neg_tot[-1],
|
||||||
|
top, y_max_main_separator_up.max()])
|
||||||
|
# dbg_plt(boxes[-1], "near top main separator box")
|
||||||
|
top = y_max_main_separator_up.max()
|
||||||
|
|
||||||
nodes_in = set()
|
#print(top,'top')
|
||||||
for ij in range(len(x_starting_up)):
|
y_mid = y_mid[args_to_be_kept]
|
||||||
nodes_in.update(range(x_starting_up[ij],
|
x_starting = x_starting[args_to_be_kept]
|
||||||
x_ending_up[ij]))
|
x_ending = x_ending[args_to_be_kept]
|
||||||
#print(nodes_in,'nodes_in')
|
y_max = y_max[args_to_be_kept]
|
||||||
#print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
|
|
||||||
|
|
||||||
if nodes_in == set(range(len(peaks_neg_tot)-1)):
|
#print('galdiha')
|
||||||
pass
|
y_grenze = top + 200
|
||||||
elif nodes_in == set(range(1, len(peaks_neg_tot)-1)):
|
up = (y_mid > top) & (y_mid <= y_grenze)
|
||||||
pass
|
args_early_ys2 = np.arange(len(y_mid))
|
||||||
else:
|
x_starting_up = x_starting[up]
|
||||||
#print('burdaydikh')
|
x_ending_up = x_ending[up]
|
||||||
args_to_be_kept2 = np.array(list( set(args_early_ys2) - set(args_up2) ))
|
y_mid_up = y_mid[up]
|
||||||
|
y_max_up = y_max[up]
|
||||||
if len(args_to_be_kept2):
|
args_up2 = args_early_ys2[up]
|
||||||
#print(args_to_be_kept2, "args_to_be_kept2")
|
#print(y_mid_up,x_starting_up,x_ending_up,'didid')
|
||||||
y_mid = y_mid[args_to_be_kept2]
|
|
||||||
x_starting = x_starting[args_to_be_kept2]
|
|
||||||
x_ending = x_ending[args_to_be_kept2]
|
|
||||||
y_max = y_max[args_to_be_kept2]
|
|
||||||
|
|
||||||
#int(top)
|
|
||||||
# order multi-column separators
|
|
||||||
y_mid_by_order=[]
|
|
||||||
x_start_by_order=[]
|
|
||||||
x_end_by_order=[]
|
|
||||||
if (reading_order_type == 1 or
|
|
||||||
len(x_end_with_child_without_mother) == 0):
|
|
||||||
if reading_order_type == 1:
|
|
||||||
# there are top-level multi-colspan horizontal separators which overlap each other
|
|
||||||
#print("adding all columns at top because of multiple overlapping mothers")
|
|
||||||
y_mid_by_order.append(top)
|
|
||||||
x_start_by_order.append(0)
|
|
||||||
x_end_by_order.append(len(peaks_neg_tot)-2)
|
|
||||||
else:
|
|
||||||
# there are no top-level multi-colspan horizontal separators which themselves
|
|
||||||
# contain shorter multi-colspan separators
|
|
||||||
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
|
|
||||||
columns_covered_by_mothers = set()
|
|
||||||
for dj in range(len(x_start_without_mother)):
|
|
||||||
columns_covered_by_mothers.update(
|
|
||||||
range(x_start_without_mother[dj],
|
|
||||||
x_end_without_mother[dj]))
|
|
||||||
columns_not_covered = list(all_columns - columns_covered_by_mothers)
|
|
||||||
#print(columns_covered_by_mothers, "columns_covered_by_mothers")
|
|
||||||
#print(columns_not_covered, "columns_not_covered")
|
|
||||||
y_mid = np.append(y_mid, np.ones(len(columns_not_covered) +
|
|
||||||
len(x_start_without_mother),
|
|
||||||
dtype=int) * top)
|
|
||||||
##y_mid_by_order = np.append(y_mid_by_order, [top] * len(columns_not_covered))
|
|
||||||
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
|
||||||
x_starting = np.append(x_starting, np.array(columns_not_covered, int))
|
|
||||||
x_starting = np.append(x_starting, x_start_without_mother)
|
|
||||||
x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
|
|
||||||
x_ending = np.append(x_ending, x_end_without_mother)
|
|
||||||
|
|
||||||
ind_args=np.arange(len(y_mid))
|
|
||||||
#print(ind_args,'ind_args')
|
|
||||||
for column in range(len(peaks_neg_tot)-1):
|
|
||||||
#print(column,'column')
|
|
||||||
ind_args_in_col=ind_args[x_starting==column]
|
|
||||||
#print('babali2')
|
|
||||||
#print(ind_args_in_col,'ind_args_in_col')
|
|
||||||
#print(len(y_mid))
|
|
||||||
y_mid_column=y_mid[ind_args_in_col]
|
|
||||||
x_start_column=x_starting[ind_args_in_col]
|
|
||||||
x_end_column=x_ending[ind_args_in_col]
|
|
||||||
#print('babali3')
|
|
||||||
ind_args_col_sorted=np.argsort(y_mid_column)
|
|
||||||
y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
|
|
||||||
x_start_by_order.extend(x_start_column[ind_args_col_sorted])
|
|
||||||
x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
|
|
||||||
else:
|
else:
|
||||||
|
args_early_ys2 = args_early_ys
|
||||||
|
args_up2 = args_up
|
||||||
|
|
||||||
|
nodes_in = set()
|
||||||
|
for ij in range(len(x_starting_up)):
|
||||||
|
nodes_in.update(range(x_starting_up[ij],
|
||||||
|
x_ending_up[ij]))
|
||||||
|
#print(nodes_in,'nodes_in')
|
||||||
|
#print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
|
||||||
|
|
||||||
|
if nodes_in == set(range(len(peaks_neg_tot)-1)):
|
||||||
|
pass
|
||||||
|
elif nodes_in == set(range(1, len(peaks_neg_tot)-1)):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
#print('burdaydikh')
|
||||||
|
args_to_be_kept2 = np.array(list( set(args_early_ys2) - set(args_up2) ))
|
||||||
|
|
||||||
|
if len(args_to_be_kept2):
|
||||||
|
#print(args_to_be_kept2, "args_to_be_kept2")
|
||||||
|
y_mid = y_mid[args_to_be_kept2]
|
||||||
|
x_starting = x_starting[args_to_be_kept2]
|
||||||
|
x_ending = x_ending[args_to_be_kept2]
|
||||||
|
y_max = y_max[args_to_be_kept2]
|
||||||
|
|
||||||
|
#int(top)
|
||||||
|
# order multi-column separators
|
||||||
|
y_mid_by_order=[]
|
||||||
|
x_start_by_order=[]
|
||||||
|
x_end_by_order=[]
|
||||||
|
if (reading_order_type == 1 or
|
||||||
|
len(x_end_with_child_without_mother) == 0):
|
||||||
|
if reading_order_type == 1:
|
||||||
|
# there are top-level multi-colspan horizontal separators which overlap each other
|
||||||
|
#print("adding all columns at top because of multiple overlapping mothers")
|
||||||
|
y_mid_by_order.append(top)
|
||||||
|
x_start_by_order.append(0)
|
||||||
|
x_end_by_order.append(len(peaks_neg_tot)-2)
|
||||||
|
else:
|
||||||
|
# there are no top-level multi-colspan horizontal separators which themselves
|
||||||
|
# contain shorter multi-colspan separators
|
||||||
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
|
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
|
||||||
columns_covered_by_mothers = set()
|
columns_covered_by_mothers = set()
|
||||||
for dj in range(len(x_start_without_mother)):
|
for dj in range(len(x_start_without_mother)):
|
||||||
|
|
@ -1895,212 +1858,170 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
|
x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
|
||||||
x_ending = np.append(x_ending, x_end_without_mother)
|
x_ending = np.append(x_ending, x_end_without_mother)
|
||||||
|
|
||||||
columns_covered_by_mothers_with_child = set()
|
ind_args=np.arange(len(y_mid))
|
||||||
for dj in range(len(x_end_with_child_without_mother)):
|
#print(ind_args,'ind_args')
|
||||||
columns_covered_by_mothers_with_child.update(
|
for column in range(len(peaks_neg_tot)-1):
|
||||||
range(x_start_with_child_without_mother[dj],
|
#print(column,'column')
|
||||||
x_end_with_child_without_mother[dj]))
|
ind_args_in_col=ind_args[x_starting==column]
|
||||||
#print(columns_covered_by_mothers_with_child, "columns_covered_by_mothers_with_child")
|
#print('babali2')
|
||||||
columns_not_covered_by_mothers_with_child = list(
|
#print(ind_args_in_col,'ind_args_in_col')
|
||||||
all_columns - columns_covered_by_mothers_with_child)
|
#print(len(y_mid))
|
||||||
#indexes_to_be_spanned=[]
|
y_mid_column=y_mid[ind_args_in_col]
|
||||||
for i_s in range(len(x_end_with_child_without_mother)):
|
x_start_column=x_starting[ind_args_in_col]
|
||||||
columns_not_covered_by_mothers_with_child.append(x_start_with_child_without_mother[i_s])
|
x_end_column=x_ending[ind_args_in_col]
|
||||||
columns_not_covered_by_mothers_with_child = np.sort(columns_not_covered_by_mothers_with_child)
|
#print('babali3')
|
||||||
#print(columns_not_covered_by_mothers_with_child, "columns_not_covered_by_mothers_with_child")
|
ind_args_col_sorted=np.argsort(y_mid_column)
|
||||||
ind_args = np.arange(len(y_mid))
|
y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
|
||||||
for i_s_nc in columns_not_covered_by_mothers_with_child:
|
x_start_by_order.extend(x_start_column[ind_args_col_sorted])
|
||||||
if i_s_nc in x_start_with_child_without_mother:
|
x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
|
||||||
# use only seps with mother's span ("biggest")
|
|
||||||
#print("i_s_nc", i_s_nc)
|
|
||||||
x_end_biggest_column = \
|
|
||||||
x_end_with_child_without_mother[
|
|
||||||
x_start_with_child_without_mother == i_s_nc][0]
|
|
||||||
args_all_biggest_seps = \
|
|
||||||
ind_args[(x_starting == i_s_nc) &
|
|
||||||
(x_ending == x_end_biggest_column)]
|
|
||||||
y_mid_column_nc = y_mid[args_all_biggest_seps]
|
|
||||||
#print("%d:%d" % (i_s_nc, x_end_biggest_column), "columns covered by mother with child")
|
|
||||||
#x_start_column_nc = x_starting[args_all_biggest_seps]
|
|
||||||
#x_end_column_nc = x_ending[args_all_biggest_seps]
|
|
||||||
y_mid_column_nc = np.sort(y_mid_column_nc)
|
|
||||||
#print(y_mid_column_nc, "y_mid_column_nc (sorted)")
|
|
||||||
for nc_top, nc_bot in pairwise(np.append(y_mid_column_nc, bot)):
|
|
||||||
#print("i_c", i_c)
|
|
||||||
#print("%d:%d" % (nc_top, nc_bot), "y_mid_column_nc")
|
|
||||||
ind_all_seps_between_nm_wc = \
|
|
||||||
ind_args[(y_mid > nc_top) &
|
|
||||||
(y_mid < nc_bot) &
|
|
||||||
(x_starting >= i_s_nc) &
|
|
||||||
(x_ending <= x_end_biggest_column)]
|
|
||||||
y_mid_all_between_nm_wc = y_mid[ind_all_seps_between_nm_wc]
|
|
||||||
x_starting_all_between_nm_wc = x_starting[ind_all_seps_between_nm_wc]
|
|
||||||
x_ending_all_between_nm_wc = x_ending[ind_all_seps_between_nm_wc]
|
|
||||||
|
|
||||||
columns_covered_by_mothers = set()
|
|
||||||
for dj in range(len(ind_all_seps_between_nm_wc)):
|
|
||||||
columns_covered_by_mothers.update(
|
|
||||||
range(x_starting_all_between_nm_wc[dj],
|
|
||||||
x_ending_all_between_nm_wc[dj]))
|
|
||||||
#print(columns_covered_by_mothers, "columns_covered_by_mothers")
|
|
||||||
child_columns = set(range(i_s_nc, x_end_biggest_column))
|
|
||||||
columns_not_covered = list(child_columns - columns_covered_by_mothers)
|
|
||||||
#print(child_columns, "child_columns")
|
|
||||||
#print(columns_not_covered, "columns_not_covered")
|
|
||||||
|
|
||||||
if len(ind_all_seps_between_nm_wc):
|
|
||||||
biggest = np.argmax(x_ending_all_between_nm_wc -
|
|
||||||
x_starting_all_between_nm_wc)
|
|
||||||
#print(ind_all_seps_between_nm_wc, "ind_all_seps_between_nm_wc")
|
|
||||||
#print(biggest, "%d:%d" % (x_starting_all_between_nm_wc[biggest],
|
|
||||||
x_ending_all_between_nm_wc[biggest]), "biggest")
|
|
||||||
if columns_covered_by_mothers == set(
|
|
||||||
range(x_starting_all_between_nm_wc[biggest],
|
|
||||||
x_ending_all_between_nm_wc[biggest])):
|
|
||||||
# single biggest accounts for all covered columns alone,
|
|
||||||
# this separator should be extended to cover all
|
|
||||||
seps_too_close_to_top_separator = \
|
|
||||||
((y_mid_all_between_nm_wc > nc_top) &
|
|
||||||
(y_mid_all_between_nm_wc <= nc_top + 500))
|
|
||||||
if (np.count_nonzero(seps_too_close_to_top_separator) and
|
|
||||||
np.count_nonzero(seps_too_close_to_top_separator) <
|
|
||||||
len(ind_all_seps_between_nm_wc)):
|
|
||||||
#print(seps_too_close_to_top_separator, "seps_too_close_to_top_separator")
|
|
||||||
y_mid_all_between_nm_wc = \
|
|
||||||
y_mid_all_between_nm_wc[~seps_too_close_to_top_separator]
|
|
||||||
x_starting_all_between_nm_wc = \
|
|
||||||
x_starting_all_between_nm_wc[~seps_too_close_to_top_separator]
|
|
||||||
x_ending_all_between_nm_wc = \
|
|
||||||
x_ending_all_between_nm_wc[~seps_too_close_to_top_separator]
|
|
||||||
|
|
||||||
y_mid_all_between_nm_wc = np.append(
|
|
||||||
y_mid_all_between_nm_wc, nc_top)
|
|
||||||
x_starting_all_between_nm_wc = np.append(
|
|
||||||
x_starting_all_between_nm_wc, i_s_nc)
|
|
||||||
x_ending_all_between_nm_wc = np.append(
|
|
||||||
x_ending_all_between_nm_wc, x_end_biggest_column)
|
|
||||||
else:
|
|
||||||
y_mid_all_between_nm_wc = np.append(
|
|
||||||
y_mid_all_between_nm_wc, nc_top)
|
|
||||||
x_starting_all_between_nm_wc = np.append(
|
|
||||||
x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest])
|
|
||||||
x_ending_all_between_nm_wc = np.append(
|
|
||||||
x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest])
|
|
||||||
|
|
||||||
if len(columns_not_covered):
|
|
||||||
y_mid_all_between_nm_wc = np.append(
|
|
||||||
y_mid_all_between_nm_wc, [nc_top] * len(columns_not_covered))
|
|
||||||
x_starting_all_between_nm_wc = np.append(
|
|
||||||
x_starting_all_between_nm_wc, np.array(columns_not_covered, int))
|
|
||||||
x_ending_all_between_nm_wc = np.append(
|
|
||||||
x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1)
|
|
||||||
|
|
||||||
ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
|
|
||||||
for column in range(int(i_s_nc), int(x_end_biggest_column)):
|
|
||||||
ind_args_in_col=ind_args_between[x_starting_all_between_nm_wc==column]
|
|
||||||
#print('babali2')
|
|
||||||
#print(ind_args_in_col,'ind_args_in_col')
|
|
||||||
#print(len(y_mid))
|
|
||||||
y_mid_column=y_mid_all_between_nm_wc[ind_args_in_col]
|
|
||||||
x_start_column=x_starting_all_between_nm_wc[ind_args_in_col]
|
|
||||||
x_end_column=x_ending_all_between_nm_wc[ind_args_in_col]
|
|
||||||
#print('babali3')
|
|
||||||
ind_args_col_sorted=np.argsort(y_mid_column)
|
|
||||||
y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
|
|
||||||
x_start_by_order.extend(x_start_column[ind_args_col_sorted])
|
|
||||||
x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
|
|
||||||
else:
|
|
||||||
#print(i_s_nc,'column not covered by mothers with child')
|
|
||||||
ind_args_in_col=ind_args[x_starting==i_s_nc]
|
|
||||||
#print('babali2')
|
|
||||||
#print(ind_args_in_col,'ind_args_in_col')
|
|
||||||
#print(len(y_mid))
|
|
||||||
y_mid_column=y_mid[ind_args_in_col]
|
|
||||||
x_start_column=x_starting[ind_args_in_col]
|
|
||||||
x_end_column=x_ending[ind_args_in_col]
|
|
||||||
#print('babali3')
|
|
||||||
ind_args_col_sorted = np.argsort(y_mid_column)
|
|
||||||
y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
|
|
||||||
x_start_by_order.extend(x_start_column[ind_args_col_sorted])
|
|
||||||
x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
|
|
||||||
|
|
||||||
# create single-column boxes from multi-column separators
|
|
||||||
y_mid_by_order = np.array(y_mid_by_order)
|
|
||||||
x_start_by_order = np.array(x_start_by_order)
|
|
||||||
x_end_by_order = np.array(x_end_by_order)
|
|
||||||
for il in range(len(y_mid_by_order)):
|
|
||||||
#print(il, "il")
|
|
||||||
y_mid_itself = y_mid_by_order[il]
|
|
||||||
x_start_itself = x_start_by_order[il]
|
|
||||||
x_end_itself = x_end_by_order[il]
|
|
||||||
for column in range(int(x_start_itself), int(x_end_itself)+1):
|
|
||||||
#print(column,'cols')
|
|
||||||
#print('burda')
|
|
||||||
#print('burda2')
|
|
||||||
y_mid_next = y_mid_by_order[(y_mid_itself < y_mid_by_order) &
|
|
||||||
(column >= x_start_by_order) &
|
|
||||||
(column <= x_end_by_order)]
|
|
||||||
y_mid_next = y_mid_next.min(initial=bot)
|
|
||||||
#print(y_mid_next,'y_mid_next')
|
|
||||||
#print(y_mid_itself,'y_mid_itself')
|
|
||||||
boxes.append([peaks_neg_tot[column],
|
|
||||||
peaks_neg_tot[column+1],
|
|
||||||
y_mid_itself,
|
|
||||||
y_mid_next])
|
|
||||||
# dbg_plt(boxes[-1], "A column %d box" % (column + 1))
|
|
||||||
except:
|
|
||||||
logger.exception("cannot assign boxes")
|
|
||||||
boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
|
|
||||||
top, bot])
|
|
||||||
# dbg_plt(boxes[-1], "fallback box")
|
|
||||||
else:
|
|
||||||
# order multi-column separators
|
|
||||||
y_mid_by_order=[]
|
|
||||||
x_start_by_order=[]
|
|
||||||
x_end_by_order=[]
|
|
||||||
if len(x_starting)>0:
|
|
||||||
columns_covered_by_seps_covered_more_than_2col = set()
|
|
||||||
for dj in range(len(x_starting)):
|
|
||||||
if set(range(x_starting[dj], x_ending[dj])) != all_columns:
|
|
||||||
columns_covered_by_seps_covered_more_than_2col.update(
|
|
||||||
range(x_starting[dj], x_ending[dj]))
|
|
||||||
columns_not_covered = list(all_columns - columns_covered_by_seps_covered_more_than_2col)
|
|
||||||
|
|
||||||
y_mid = np.append(y_mid, np.ones(len(columns_not_covered) + 1,
|
|
||||||
dtype=int) * top)
|
|
||||||
##y_mid_by_order = np.append(y_mid_by_order, [top] * len(columns_not_covered))
|
|
||||||
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
|
||||||
x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
|
|
||||||
x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
|
|
||||||
if len(new_main_sep_y) > 0:
|
|
||||||
x_starting = np.append(x_starting, 0)
|
|
||||||
x_ending = np.append(x_ending, len(peaks_neg_tot) - 1)
|
|
||||||
else:
|
|
||||||
x_starting = np.append(x_starting, x_starting[0])
|
|
||||||
x_ending = np.append(x_ending, x_ending[0])
|
|
||||||
else:
|
else:
|
||||||
columns_not_covered = list(all_columns)
|
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
|
||||||
y_mid = np.append(y_mid, np.ones(len(columns_not_covered),
|
columns_covered_by_mothers = set()
|
||||||
|
for dj in range(len(x_start_without_mother)):
|
||||||
|
columns_covered_by_mothers.update(
|
||||||
|
range(x_start_without_mother[dj],
|
||||||
|
x_end_without_mother[dj]))
|
||||||
|
columns_not_covered = list(all_columns - columns_covered_by_mothers)
|
||||||
|
#print(columns_covered_by_mothers, "columns_covered_by_mothers")
|
||||||
|
#print(columns_not_covered, "columns_not_covered")
|
||||||
|
y_mid = np.append(y_mid, np.ones(len(columns_not_covered) +
|
||||||
|
len(x_start_without_mother),
|
||||||
dtype=int) * top)
|
dtype=int) * top)
|
||||||
##y_mid_by_order = np.append(y_mid_by_order, [top] * len(columns_not_covered))
|
##y_mid_by_order = np.append(y_mid_by_order, [top] * len(columns_not_covered))
|
||||||
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||||
x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
|
x_starting = np.append(x_starting, np.array(columns_not_covered, int))
|
||||||
x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
|
x_starting = np.append(x_starting, x_start_without_mother)
|
||||||
|
x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
|
||||||
|
x_ending = np.append(x_ending, x_end_without_mother)
|
||||||
|
|
||||||
ind_args = np.arange(len(y_mid))
|
columns_covered_by_mothers_with_child = set()
|
||||||
|
for dj in range(len(x_end_with_child_without_mother)):
|
||||||
|
columns_covered_by_mothers_with_child.update(
|
||||||
|
range(x_start_with_child_without_mother[dj],
|
||||||
|
x_end_with_child_without_mother[dj]))
|
||||||
|
#print(columns_covered_by_mothers_with_child, "columns_covered_by_mothers_with_child")
|
||||||
|
columns_not_covered_by_mothers_with_child = list(
|
||||||
|
all_columns - columns_covered_by_mothers_with_child)
|
||||||
|
#indexes_to_be_spanned=[]
|
||||||
|
for i_s in range(len(x_end_with_child_without_mother)):
|
||||||
|
columns_not_covered_by_mothers_with_child.append(x_start_with_child_without_mother[i_s])
|
||||||
|
columns_not_covered_by_mothers_with_child = np.sort(columns_not_covered_by_mothers_with_child)
|
||||||
|
#print(columns_not_covered_by_mothers_with_child, "columns_not_covered_by_mothers_with_child")
|
||||||
|
ind_args = np.arange(len(y_mid))
|
||||||
|
for i_s_nc in columns_not_covered_by_mothers_with_child:
|
||||||
|
if i_s_nc in x_start_with_child_without_mother:
|
||||||
|
# use only seps with mother's span ("biggest")
|
||||||
|
#print("i_s_nc", i_s_nc)
|
||||||
|
x_end_biggest_column = \
|
||||||
|
x_end_with_child_without_mother[
|
||||||
|
x_start_with_child_without_mother == i_s_nc][0]
|
||||||
|
args_all_biggest_seps = \
|
||||||
|
ind_args[(x_starting == i_s_nc) &
|
||||||
|
(x_ending == x_end_biggest_column)]
|
||||||
|
y_mid_column_nc = y_mid[args_all_biggest_seps]
|
||||||
|
#print("%d:%d" % (i_s_nc, x_end_biggest_column), "columns covered by mother with child")
|
||||||
|
#x_start_column_nc = x_starting[args_all_biggest_seps]
|
||||||
|
#x_end_column_nc = x_ending[args_all_biggest_seps]
|
||||||
|
y_mid_column_nc = np.sort(y_mid_column_nc)
|
||||||
|
#print(y_mid_column_nc, "y_mid_column_nc (sorted)")
|
||||||
|
for nc_top, nc_bot in pairwise(np.append(y_mid_column_nc, bot)):
|
||||||
|
#print("i_c", i_c)
|
||||||
|
#print("%d:%d" % (nc_top, nc_bot), "y_mid_column_nc")
|
||||||
|
ind_all_seps_between_nm_wc = \
|
||||||
|
ind_args[(y_mid > nc_top) &
|
||||||
|
(y_mid < nc_bot) &
|
||||||
|
(x_starting >= i_s_nc) &
|
||||||
|
(x_ending <= x_end_biggest_column)]
|
||||||
|
y_mid_all_between_nm_wc = y_mid[ind_all_seps_between_nm_wc]
|
||||||
|
x_starting_all_between_nm_wc = x_starting[ind_all_seps_between_nm_wc]
|
||||||
|
x_ending_all_between_nm_wc = x_ending[ind_all_seps_between_nm_wc]
|
||||||
|
|
||||||
for column in range(len(peaks_neg_tot)-1):
|
columns_covered_by_mothers = set()
|
||||||
#print(column,'column')
|
for dj in range(len(ind_all_seps_between_nm_wc)):
|
||||||
ind_args_in_col=ind_args[x_starting==column]
|
columns_covered_by_mothers.update(
|
||||||
#print(len(y_mid))
|
range(x_starting_all_between_nm_wc[dj],
|
||||||
y_mid_column=y_mid[ind_args_in_col]
|
x_ending_all_between_nm_wc[dj]))
|
||||||
x_start_column=x_starting[ind_args_in_col]
|
#print(columns_covered_by_mothers, "columns_covered_by_mothers")
|
||||||
x_end_column=x_ending[ind_args_in_col]
|
child_columns = set(range(i_s_nc, x_end_biggest_column))
|
||||||
|
columns_not_covered = list(child_columns - columns_covered_by_mothers)
|
||||||
|
#print(child_columns, "child_columns")
|
||||||
|
#print(columns_not_covered, "columns_not_covered")
|
||||||
|
|
||||||
ind_args_col_sorted = np.argsort(y_mid_column)
|
if len(ind_all_seps_between_nm_wc):
|
||||||
y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
|
biggest = np.argmax(x_ending_all_between_nm_wc -
|
||||||
x_start_by_order.extend(x_start_column[ind_args_col_sorted])
|
x_starting_all_between_nm_wc)
|
||||||
x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
|
#print(ind_all_seps_between_nm_wc, "ind_all_seps_between_nm_wc")
|
||||||
|
#print(biggest, "%d:%d" % (x_starting_all_between_nm_wc[biggest],
|
||||||
|
x_ending_all_between_nm_wc[biggest]), "biggest")
|
||||||
|
if columns_covered_by_mothers == set(
|
||||||
|
range(x_starting_all_between_nm_wc[biggest],
|
||||||
|
x_ending_all_between_nm_wc[biggest])):
|
||||||
|
# single biggest accounts for all covered columns alone,
|
||||||
|
# this separator should be extended to cover all
|
||||||
|
seps_too_close_to_top_separator = \
|
||||||
|
((y_mid_all_between_nm_wc > nc_top) &
|
||||||
|
(y_mid_all_between_nm_wc <= nc_top + 500))
|
||||||
|
if (np.count_nonzero(seps_too_close_to_top_separator) and
|
||||||
|
np.count_nonzero(seps_too_close_to_top_separator) <
|
||||||
|
len(ind_all_seps_between_nm_wc)):
|
||||||
|
#print(seps_too_close_to_top_separator, "seps_too_close_to_top_separator")
|
||||||
|
y_mid_all_between_nm_wc = \
|
||||||
|
y_mid_all_between_nm_wc[~seps_too_close_to_top_separator]
|
||||||
|
x_starting_all_between_nm_wc = \
|
||||||
|
x_starting_all_between_nm_wc[~seps_too_close_to_top_separator]
|
||||||
|
x_ending_all_between_nm_wc = \
|
||||||
|
x_ending_all_between_nm_wc[~seps_too_close_to_top_separator]
|
||||||
|
|
||||||
|
y_mid_all_between_nm_wc = np.append(
|
||||||
|
y_mid_all_between_nm_wc, nc_top)
|
||||||
|
x_starting_all_between_nm_wc = np.append(
|
||||||
|
x_starting_all_between_nm_wc, i_s_nc)
|
||||||
|
x_ending_all_between_nm_wc = np.append(
|
||||||
|
x_ending_all_between_nm_wc, x_end_biggest_column)
|
||||||
|
else:
|
||||||
|
y_mid_all_between_nm_wc = np.append(
|
||||||
|
y_mid_all_between_nm_wc, nc_top)
|
||||||
|
x_starting_all_between_nm_wc = np.append(
|
||||||
|
x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest])
|
||||||
|
x_ending_all_between_nm_wc = np.append(
|
||||||
|
x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest])
|
||||||
|
|
||||||
|
if len(columns_not_covered):
|
||||||
|
y_mid_all_between_nm_wc = np.append(
|
||||||
|
y_mid_all_between_nm_wc, [nc_top] * len(columns_not_covered))
|
||||||
|
x_starting_all_between_nm_wc = np.append(
|
||||||
|
x_starting_all_between_nm_wc, np.array(columns_not_covered, int))
|
||||||
|
x_ending_all_between_nm_wc = np.append(
|
||||||
|
x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1)
|
||||||
|
|
||||||
|
ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
|
||||||
|
for column in range(int(i_s_nc), int(x_end_biggest_column)):
|
||||||
|
ind_args_in_col=ind_args_between[x_starting_all_between_nm_wc==column]
|
||||||
|
#print('babali2')
|
||||||
|
#print(ind_args_in_col,'ind_args_in_col')
|
||||||
|
#print(len(y_mid))
|
||||||
|
y_mid_column=y_mid_all_between_nm_wc[ind_args_in_col]
|
||||||
|
x_start_column=x_starting_all_between_nm_wc[ind_args_in_col]
|
||||||
|
x_end_column=x_ending_all_between_nm_wc[ind_args_in_col]
|
||||||
|
#print('babali3')
|
||||||
|
ind_args_col_sorted=np.argsort(y_mid_column)
|
||||||
|
y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
|
||||||
|
x_start_by_order.extend(x_start_column[ind_args_col_sorted])
|
||||||
|
x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
|
||||||
|
else:
|
||||||
|
#print(i_s_nc,'column not covered by mothers with child')
|
||||||
|
ind_args_in_col=ind_args[x_starting==i_s_nc]
|
||||||
|
#print('babali2')
|
||||||
|
#print(ind_args_in_col,'ind_args_in_col')
|
||||||
|
#print(len(y_mid))
|
||||||
|
y_mid_column=y_mid[ind_args_in_col]
|
||||||
|
x_start_column=x_starting[ind_args_in_col]
|
||||||
|
x_end_column=x_ending[ind_args_in_col]
|
||||||
|
#print('babali3')
|
||||||
|
ind_args_col_sorted = np.argsort(y_mid_column)
|
||||||
|
y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
|
||||||
|
x_start_by_order.extend(x_start_column[ind_args_col_sorted])
|
||||||
|
x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
|
||||||
|
|
||||||
# create single-column boxes from multi-column separators
|
# create single-column boxes from multi-column separators
|
||||||
y_mid_by_order = np.array(y_mid_by_order)
|
y_mid_by_order = np.array(y_mid_by_order)
|
||||||
|
|
@ -2109,23 +2030,101 @@ def return_boxes_of_images_by_order_of_reading_new(
|
||||||
for il in range(len(y_mid_by_order)):
|
for il in range(len(y_mid_by_order)):
|
||||||
#print(il, "il")
|
#print(il, "il")
|
||||||
y_mid_itself = y_mid_by_order[il]
|
y_mid_itself = y_mid_by_order[il]
|
||||||
#print(y_mid_itself,'y_mid_itself')
|
|
||||||
x_start_itself = x_start_by_order[il]
|
x_start_itself = x_start_by_order[il]
|
||||||
x_end_itself = x_end_by_order[il]
|
x_end_itself = x_end_by_order[il]
|
||||||
for column in range(x_start_itself, x_end_itself+1):
|
for column in range(int(x_start_itself), int(x_end_itself)+1):
|
||||||
#print(column,'cols')
|
#print(column,'cols')
|
||||||
|
#print('burda')
|
||||||
#print('burda2')
|
#print('burda2')
|
||||||
y_mid_next = y_mid_by_order[(y_mid_itself < y_mid_by_order) &
|
y_mid_next = y_mid_by_order[(y_mid_itself < y_mid_by_order) &
|
||||||
(column >= x_start_by_order) &
|
(column >= x_start_by_order) &
|
||||||
(column <= x_end_by_order)]
|
(column <= x_end_by_order)]
|
||||||
#print(y_mid_next,'y_mid_next')
|
|
||||||
y_mid_next = y_mid_next.min(initial=bot)
|
y_mid_next = y_mid_next.min(initial=bot)
|
||||||
#print(y_mid_next,'y_mid_next')
|
#print(y_mid_next,'y_mid_next')
|
||||||
|
#print(y_mid_itself,'y_mid_itself')
|
||||||
boxes.append([peaks_neg_tot[column],
|
boxes.append([peaks_neg_tot[column],
|
||||||
peaks_neg_tot[column+1],
|
peaks_neg_tot[column+1],
|
||||||
y_mid_itself,
|
y_mid_itself,
|
||||||
y_mid_next])
|
y_mid_next])
|
||||||
# dbg_plt(boxes[-1], "B column %d box" % (column + 1))
|
# dbg_plt(boxes[-1], "A column %d box" % (column + 1))
|
||||||
|
except:
|
||||||
|
logger.exception("cannot assign boxes")
|
||||||
|
boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
|
||||||
|
top, bot])
|
||||||
|
# dbg_plt(boxes[-1], "fallback box")
|
||||||
|
else:
|
||||||
|
# order multi-column separators
|
||||||
|
y_mid_by_order=[]
|
||||||
|
x_start_by_order=[]
|
||||||
|
x_end_by_order=[]
|
||||||
|
if len(x_starting)>0:
|
||||||
|
columns_covered_by_seps_covered_more_than_2col = set()
|
||||||
|
for dj in range(len(x_starting)):
|
||||||
|
if set(range(x_starting[dj], x_ending[dj])) != all_columns:
|
||||||
|
columns_covered_by_seps_covered_more_than_2col.update(
|
||||||
|
range(x_starting[dj], x_ending[dj]))
|
||||||
|
columns_not_covered = list(all_columns - columns_covered_by_seps_covered_more_than_2col)
|
||||||
|
|
||||||
|
y_mid = np.append(y_mid, np.ones(len(columns_not_covered) + 1,
|
||||||
|
dtype=int) * top)
|
||||||
|
##y_mid_by_order = np.append(y_mid_by_order, [top] * len(columns_not_covered))
|
||||||
|
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||||
|
x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
|
||||||
|
x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
|
||||||
|
if len(new_main_sep_y) > 0:
|
||||||
|
x_starting = np.append(x_starting, 0)
|
||||||
|
x_ending = np.append(x_ending, len(peaks_neg_tot) - 1)
|
||||||
|
else:
|
||||||
|
x_starting = np.append(x_starting, x_starting[0])
|
||||||
|
x_ending = np.append(x_ending, x_ending[0])
|
||||||
|
else:
|
||||||
|
columns_not_covered = list(all_columns)
|
||||||
|
y_mid = np.append(y_mid, np.ones(len(columns_not_covered),
|
||||||
|
dtype=int) * top)
|
||||||
|
##y_mid_by_order = np.append(y_mid_by_order, [top] * len(columns_not_covered))
|
||||||
|
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
|
||||||
|
x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
|
||||||
|
x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
|
||||||
|
|
||||||
|
ind_args = np.arange(len(y_mid))
|
||||||
|
|
||||||
|
for column in range(len(peaks_neg_tot)-1):
|
||||||
|
#print(column,'column')
|
||||||
|
ind_args_in_col=ind_args[x_starting==column]
|
||||||
|
#print(len(y_mid))
|
||||||
|
y_mid_column=y_mid[ind_args_in_col]
|
||||||
|
x_start_column=x_starting[ind_args_in_col]
|
||||||
|
x_end_column=x_ending[ind_args_in_col]
|
||||||
|
|
||||||
|
ind_args_col_sorted = np.argsort(y_mid_column)
|
||||||
|
y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
|
||||||
|
x_start_by_order.extend(x_start_column[ind_args_col_sorted])
|
||||||
|
x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
|
||||||
|
|
||||||
|
# create single-column boxes from multi-column separators
|
||||||
|
y_mid_by_order = np.array(y_mid_by_order)
|
||||||
|
x_start_by_order = np.array(x_start_by_order)
|
||||||
|
x_end_by_order = np.array(x_end_by_order)
|
||||||
|
for il in range(len(y_mid_by_order)):
|
||||||
|
#print(il, "il")
|
||||||
|
y_mid_itself = y_mid_by_order[il]
|
||||||
|
#print(y_mid_itself,'y_mid_itself')
|
||||||
|
x_start_itself = x_start_by_order[il]
|
||||||
|
x_end_itself = x_end_by_order[il]
|
||||||
|
for column in range(x_start_itself, x_end_itself+1):
|
||||||
|
#print(column,'cols')
|
||||||
|
#print('burda2')
|
||||||
|
y_mid_next = y_mid_by_order[(y_mid_itself < y_mid_by_order) &
|
||||||
|
(column >= x_start_by_order) &
|
||||||
|
(column <= x_end_by_order)]
|
||||||
|
#print(y_mid_next,'y_mid_next')
|
||||||
|
y_mid_next = y_mid_next.min(initial=bot)
|
||||||
|
#print(y_mid_next,'y_mid_next')
|
||||||
|
boxes.append([peaks_neg_tot[column],
|
||||||
|
peaks_neg_tot[column+1],
|
||||||
|
y_mid_itself,
|
||||||
|
y_mid_next])
|
||||||
|
# dbg_plt(boxes[-1], "B column %d box" % (column + 1))
|
||||||
|
|
||||||
if right2left_readingorder:
|
if right2left_readingorder:
|
||||||
peaks_neg_tot_tables_new = []
|
peaks_neg_tot_tables_new = []
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue