return_boxes_of_images_by_order_of_reading_new: simplify, avoid changing dtype during np.append

This commit is contained in:
Robert Sachunsky 2025-08-19 20:09:09 +02:00
parent f994ea5f0b
commit e9a6ff5d81
2 changed files with 38 additions and 53 deletions

View file

@ -3776,7 +3776,7 @@ class Eynollah:
return all_found_textline_polygons return all_found_textline_polygons
def dilate_textregions_contours(self, all_found_textline_polygons): def dilate_textregions_contours(self, all_found_textline_polygons):
return [np.array(make_valid(Polygon(poly[:, 0])).buffer(5).exterior.coords, return [np.array(make_valid(Polygon(poly[:, 0]).buffer(5)).exterior.coords,
dtype=int)[:, np.newaxis] dtype=int)[:, np.newaxis]
for poly in all_found_textline_polygons] for poly in all_found_textline_polygons]

View file

@ -1742,6 +1742,7 @@ def return_boxes_of_images_by_order_of_reading_new(
x_ending = np.array(x_ending) x_ending = np.array(x_ending)
y_type_2 = np.array(y_type_2) y_type_2 = np.array(y_type_2)
y_diff_type_2 = np.array(y_diff_type_2) y_diff_type_2 = np.array(y_diff_type_2)
all_columns = set(range(len(peaks_neg_tot) - 1))
if ((reading_order_type==1) or if ((reading_order_type==1) or
(reading_order_type==0 and (reading_order_type==0 and
@ -1863,19 +1864,16 @@ def return_boxes_of_images_by_order_of_reading_new(
x_end_by_order.append(len(peaks_neg_tot)-2) x_end_by_order.append(len(peaks_neg_tot)-2)
else: else:
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
columns_covered_by_mothers = [] columns_covered_by_mothers = set()
for dj in range(len(x_start_without_mother)): for dj in range(len(x_start_without_mother)):
columns_covered_by_mothers = columns_covered_by_mothers + \ columns_covered_by_mothers.update(
list(range(x_start_without_mother[dj], range(x_start_without_mother[dj],
x_end_without_mother[dj])) x_end_without_mother[dj]))
columns_covered_by_mothers = list(set(columns_covered_by_mothers)) columns_not_covered = list(all_columns - columns_covered_by_mothers)
all_columns=np.arange(len(peaks_neg_tot)-1)
columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered) x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
x_starting = np.append(x_starting, x_start_without_mother) x_starting = np.append(x_starting, x_start_without_mother)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, np.array(columns_not_covered) + 1)
x_ending = np.append(x_ending, x_end_without_mother) x_ending = np.append(x_ending, x_end_without_mother)
@ -1906,32 +1904,26 @@ def return_boxes_of_images_by_order_of_reading_new(
x_end_by_order.append(x_end_column_sort[ii]-1) x_end_by_order.append(x_end_column_sort[ii]-1)
else: else:
#print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
columns_covered_by_mothers = [] columns_covered_by_mothers = set()
for dj in range(len(x_start_without_mother)): for dj in range(len(x_start_without_mother)):
columns_covered_by_mothers = columns_covered_by_mothers + \ columns_covered_by_mothers.update(
list(range(x_start_without_mother[dj], range(x_start_without_mother[dj],
x_end_without_mother[dj])) x_end_without_mother[dj]))
columns_covered_by_mothers = list(set(columns_covered_by_mothers)) columns_not_covered = list(all_columns - columns_covered_by_mothers)
all_columns=np.arange(len(peaks_neg_tot)-1)
columns_not_covered=list(set(all_columns) - set(columns_covered_by_mothers))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother))) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + len(x_start_without_mother)))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered) x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
x_starting = np.append(x_starting, x_start_without_mother) x_starting = np.append(x_starting, x_start_without_mother)
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
x_ending = np.append(x_ending, x_end_without_mother) x_ending = np.append(x_ending, x_end_without_mother)
columns_covered_by_with_child_no_mothers = [] columns_covered_by_with_child_no_mothers = set()
for dj in range(len(x_end_with_child_without_mother)): for dj in range(len(x_end_with_child_without_mother)):
columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + \ columns_covered_by_with_child_no_mothers.update(
list(range(x_start_with_child_without_mother[dj], range(x_start_with_child_without_mother[dj],
x_end_with_child_without_mother[dj])) x_end_with_child_without_mother[dj]))
columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers)) columns_not_covered_child_no_mother = list(all_columns - columns_covered_by_with_child_no_mothers)
all_columns = np.arange(len(peaks_neg_tot)-1)
columns_not_covered_child_no_mother = list(set(all_columns) - set(columns_covered_by_with_child_no_mothers))
#indexes_to_be_spanned=[] #indexes_to_be_spanned=[]
for i_s in range(len(x_end_with_child_without_mother)): for i_s in range(len(x_end_with_child_without_mother)):
columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s]) columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s])
@ -1967,21 +1959,19 @@ def return_boxes_of_images_by_order_of_reading_new(
if len(x_diff_all_between_nm_wc)>0: if len(x_diff_all_between_nm_wc)>0:
biggest=np.argmax(x_diff_all_between_nm_wc) biggest=np.argmax(x_diff_all_between_nm_wc)
columns_covered_by_mothers = [] columns_covered_by_mothers = set()
for dj in range(len(x_starting_all_between_nm_wc)): for dj in range(len(x_starting_all_between_nm_wc)):
columns_covered_by_mothers = columns_covered_by_mothers + \ columns_covered_by_mothers.update(
list(range(x_starting_all_between_nm_wc[dj], range(x_starting_all_between_nm_wc[dj],
x_ending_all_between_nm_wc[dj])) x_ending_all_between_nm_wc[dj]))
columns_covered_by_mothers = list(set(columns_covered_by_mothers)) child_columns = set(range(i_s_nc, x_end_biggest_column))
columns_not_covered = list(child_columns - columns_covered_by_mothers)
all_columns=np.arange(i_s_nc, x_end_biggest_column)
columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers))
should_longest_line_be_extended=0 should_longest_line_be_extended=0
if (len(x_diff_all_between_nm_wc) > 0 and if (len(x_diff_all_between_nm_wc) > 0 and
set(list(range(x_starting_all_between_nm_wc[biggest], set(list(range(x_starting_all_between_nm_wc[biggest],
x_ending_all_between_nm_wc[biggest])) + x_ending_all_between_nm_wc[biggest])) +
list(columns_not_covered)) != set(all_columns)): list(columns_not_covered)) != child_columns):
should_longest_line_be_extended=1 should_longest_line_be_extended=1
index_lines_so_close_to_top_separator = \ index_lines_so_close_to_top_separator = \
np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) & np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) &
@ -2092,36 +2082,31 @@ def return_boxes_of_images_by_order_of_reading_new(
x_start_by_order=[] x_start_by_order=[]
x_end_by_order=[] x_end_by_order=[]
if len(x_starting)>0: if len(x_starting)>0:
all_columns = np.arange(len(peaks_neg_tot)-1) columns_covered_by_lines_covered_more_than_2col = set()
columns_covered_by_lines_covered_more_than_2col = []
for dj in range(len(x_starting)): for dj in range(len(x_starting)):
if set(list(range(x_starting[dj],x_ending[dj]))) == set(all_columns): if set(range(x_starting[dj], x_ending[dj])) != all_columns:
pass columns_covered_by_lines_covered_more_than_2col.update(
else: range(x_starting[dj], x_ending[dj]))
columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + \ columns_not_covered = list(all_columns - columns_covered_by_lines_covered_more_than_2col)
list(range(x_starting[dj],x_ending[dj]))
columns_covered_by_lines_covered_more_than_2col = list(set(columns_covered_by_lines_covered_more_than_2col))
columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1)) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * (len(columns_not_covered) + 1))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered) x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
if len(new_main_sep_y) > 0: if len(new_main_sep_y) > 0:
x_starting = np.append(x_starting, 0) x_starting = np.append(x_starting, 0)
x_ending = np.append(x_ending, len(peaks_neg_tot)-1) x_ending = np.append(x_ending, len(peaks_neg_tot) - 1)
else: else:
x_starting = np.append(x_starting, x_starting[0]) x_starting = np.append(x_starting, x_starting[0])
x_ending = np.append(x_ending, x_ending[0]) x_ending = np.append(x_ending, x_ending[0])
else: else:
all_columns = np.arange(len(peaks_neg_tot)-1) columns_not_covered = list(all_columns)
columns_not_covered = list(set(all_columns))
y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered)) y_type_2 = np.append(y_type_2, [int(splitter_y_new[i])] * len(columns_not_covered))
##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered)) ##y_lines_by_order = np.append(y_lines_by_order, [int(splitter_y_new[i])] * len(columns_not_covered))
##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered)) ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
x_starting = np.append(x_starting, columns_not_covered) x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
x_ending = np.append(x_ending, np.array(columns_not_covered) + 1) x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
ind_args=np.array(range(len(y_type_2))) ind_args=np.array(range(len(y_type_2)))
#ind_args=np.array(ind_args) #ind_args=np.array(ind_args)