return_boxes_of_images_by_order_of_reading_new: indent

(by removing unnecessary conditional)
2026-03-02 13:22:00 +01:00 · 2025-10-24 02:30:39 +02:00 · 2025-10-24 02:30:39 +02:00 · 3ebbc2d693
commit 3ebbc2d693
parent 66a0e55e49
1 changed files with 421 additions and 422 deletions
--- a/src/eynollah/utils/init.py
+++ b/src/eynollah/utils/init.py
@ -1641,241 +1641,204 @@ def return_boxes_of_images_by_order_of_reading_new(
        #if (len(matrix_new[:,9][matrix_new[:,9]==1]) > 0 and
        #    np.max(matrix_new[:,8][matrix_new[:,9]==1]) >=
        #    0.1 * (np.abs(bot-top))):
-        if True:
+        try:
-            try:
+            num_col, peaks_neg_fin = find_num_col(
-                num_col, peaks_neg_fin = find_num_col(
+                regions_without_separators[top:bot],
-                    regions_without_separators[top:bot],
+                # we do not expect to get all columns in small parts (headings etc.):
-                    # we do not expect to get all columns in small parts (headings etc.):
+                num_col_classifier if bot - top >= big_part else 1,
-                    num_col_classifier if bot - top >= big_part else 1,
+                tables, multiplier=6. if erosion_hurts else 7.)
-                    tables, multiplier=6. if erosion_hurts else 7.)
+        except:
-            except:
+            peaks_neg_fin=[]
-                peaks_neg_fin=[]
+            num_col = 0
-                num_col = 0
+        try:
-            try:
+            if ((len(peaks_neg_fin) + 1 < num_col_classifier or
-                if ((len(peaks_neg_fin) + 1 < num_col_classifier or
+                num_col_classifier == 6) and
-                    num_col_classifier == 6) and
+                # we do not expect to get all columns in small parts (headings etc.):
-                    # we do not expect to get all columns in small parts (headings etc.):
+                bot - top >= big_part):
-                    bot - top >= big_part):
+                # found too few columns here
-                    # found too few columns here
+                #print('burda')
-                    #print('burda')
+                peaks_neg_fin_org = np.copy(peaks_neg_fin)
-                    peaks_neg_fin_org = np.copy(peaks_neg_fin)
+                #print("peaks_neg_fin_org", peaks_neg_fin_org)
-                    #print("peaks_neg_fin_org", peaks_neg_fin_org)
+                if len(peaks_neg_fin)==0:
-                    if len(peaks_neg_fin)==0:
+                    num_col, peaks_neg_fin = find_num_col(
-                        num_col, peaks_neg_fin = find_num_col(
+                        regions_without_separators[top:bot],
-                            regions_without_separators[top:bot],
+                        num_col_classifier, tables, multiplier=3.)
-                            num_col_classifier, tables, multiplier=3.)
+                #print(peaks_neg_fin,'peaks_neg_fin')
-                    #print(peaks_neg_fin,'peaks_neg_fin')
+                peaks_neg_fin_early = [0] + peaks_neg_fin + [width_tot-1]
                    peaks_neg_fin_early = [0] + peaks_neg_fin + [width_tot-1]
-                    #print(peaks_neg_fin_early,'burda2')
+                #print(peaks_neg_fin_early,'burda2')
-                    peaks_neg_fin_rev=[]
+                peaks_neg_fin_rev=[]
-                    for left, right in pairwise(peaks_neg_fin_early):
+                for left, right in pairwise(peaks_neg_fin_early):
-                        # print("%d:%d" % (left, right), 'i_n')
+                    # print("%d:%d" % (left, right), 'i_n')
-                        # dbg_plt([left, right, top, bot],
+                    # dbg_plt([left, right, top, bot],
-                        #         "image cut for y split %d:%d / x gap %d:%d" % (
+                    #         "image cut for y split %d:%d / x gap %d:%d" % (
-                        #             top, bot, left, right))
+                    #             top, bot, left, right))
-                        # plt.plot(regions_without_separators[top:bot, left:right].sum(axis=0))
+                    # plt.plot(regions_without_separators[top:bot, left:right].sum(axis=0))
-                        # plt.title("vertical projection (sum over y)")
+                    # plt.title("vertical projection (sum over y)")
-                        # plt.show()
+                    # plt.show()
-                        try:
+                    try:
-                            _, peaks_neg_fin1 = find_num_col(
+                        _, peaks_neg_fin1 = find_num_col(
-                                regions_without_separators[top:bot, left:right],
+                            regions_without_separators[top:bot, left:right],
-                                num_col_classifier, tables, multiplier=7.)
+                            num_col_classifier, tables, multiplier=7.)
-                        except:
+                    except:
-                            peaks_neg_fin1 = []
+                        peaks_neg_fin1 = []
-                        try:
+                    try:
-                            _, peaks_neg_fin2 = find_num_col(
+                        _, peaks_neg_fin2 = find_num_col(
-                                regions_without_separators[top:bot, left:right],
+                            regions_without_separators[top:bot, left:right],
-                                num_col_classifier, tables, multiplier=5.)
+                            num_col_classifier, tables, multiplier=5.)
-                        except:
+                    except:
-                            peaks_neg_fin2 = []
+                        peaks_neg_fin2 = []
-                        if len(peaks_neg_fin1) >= len(peaks_neg_fin2):
+                    if len(peaks_neg_fin1) >= len(peaks_neg_fin2):
-                            peaks_neg_fin = peaks_neg_fin1
+                        peaks_neg_fin = peaks_neg_fin1
                        else:
                            peaks_neg_fin = peaks_neg_fin2
                        # add offset to local result
                        peaks_neg_fin = list(np.array(peaks_neg_fin) + left)
                        #print(peaks_neg_fin,'peaks_neg_fin')
                        peaks_neg_fin_rev.extend(peaks_neg_fin)
                        if right < peaks_neg_fin_early[-1]:
                            # all but the last column: interject the preexisting boundary
                            peaks_neg_fin_rev.append(right)
                        #print(peaks_neg_fin_rev,'peaks_neg_fin_rev')
                    if len(peaks_neg_fin_rev) >= len(peaks_neg_fin_org):
                        peaks_neg_fin = peaks_neg_fin_rev
                    else:
-                        peaks_neg_fin = peaks_neg_fin_org
+                        peaks_neg_fin = peaks_neg_fin2
-                    num_col = len(peaks_neg_fin)
+                    # add offset to local result
                    peaks_neg_fin = list(np.array(peaks_neg_fin) + left)
                    #print(peaks_neg_fin,'peaks_neg_fin')
            except:
                logger.exception("cannot find peaks consistent with columns")
            #num_col, peaks_neg_fin = find_num_col(
            #    regions_without_separators[top:bot,:],
            #    multiplier=7.0)
            x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ]
            x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ]
            cy_hor_some=matrix_new[:,5][ (matrix_new[:,9]==0) ]
            y_max_hor_some=matrix_new[:,7][ (matrix_new[:,9]==0) ]
-            if right2left_readingorder:
+                    peaks_neg_fin_rev.extend(peaks_neg_fin)
-                x_max_hor_some_new = width_tot - x_min_hor_some
+                    if right < peaks_neg_fin_early[-1]:
-                x_min_hor_some_new = width_tot - x_max_hor_some
+                        # all but the last column: interject the preexisting boundary
-                x_min_hor_some =list(np.copy(x_min_hor_some_new))
+                        peaks_neg_fin_rev.append(right)
-                x_max_hor_some =list(np.copy(x_max_hor_some_new))
+                    #print(peaks_neg_fin_rev,'peaks_neg_fin_rev')
-            peaks_neg_tot = np.array([0] + peaks_neg_fin + [width_tot])
+                if len(peaks_neg_fin_rev) >= len(peaks_neg_fin_org):
-            #print(peaks_neg_tot,'peaks_neg_tot')
+                    peaks_neg_fin = peaks_neg_fin_rev
-            peaks_neg_tot_tables.append(peaks_neg_tot)
+                else:
                    peaks_neg_fin = peaks_neg_fin_org
                num_col = len(peaks_neg_fin)
                #print(peaks_neg_fin,'peaks_neg_fin')
        except:
            logger.exception("cannot find peaks consistent with columns")
        #num_col, peaks_neg_fin = find_num_col(
        #    regions_without_separators[top:bot,:],
        #    multiplier=7.0)
        x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ]
        x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ]
        cy_hor_some=matrix_new[:,5][ (matrix_new[:,9]==0) ]
        y_max_hor_some=matrix_new[:,7][ (matrix_new[:,9]==0) ]
-            all_columns = set(range(len(peaks_neg_tot) - 1))
+        if right2left_readingorder:
-            #print("all_columns", all_columns)
+            x_max_hor_some_new = width_tot - x_min_hor_some
            x_min_hor_some_new = width_tot - x_max_hor_some
            x_min_hor_some =list(np.copy(x_min_hor_some_new))
            x_max_hor_some =list(np.copy(x_max_hor_some_new))
-            reading_order_type, x_starting, x_ending, y_mid, y_max, \
+        peaks_neg_tot = np.array([0] + peaks_neg_fin + [width_tot])
-                y_mid_without_mother, x_start_without_mother, x_end_without_mother, \
+        #print(peaks_neg_tot,'peaks_neg_tot')
-                there_is_sep_with_child, \
+        peaks_neg_tot_tables.append(peaks_neg_tot)
                y_mid_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \
                new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order(
                    x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, y_max_hor_some)
-            # show multi-column separators
+        all_columns = set(range(len(peaks_neg_tot) - 1))
-            # dbg_plt([0, None, top, bot], "multi-column separators in current split", 
+        #print("all_columns", all_columns)
            #         list(zip(peaks_neg_tot[x_starting], peaks_neg_tot[x_ending],
            #                  y_mid - top, y_max - top)), True)
-            if (reading_order_type == 1 or
+        reading_order_type, x_starting, x_ending, y_mid, y_max, \
-                len(y_mid_without_mother) >= 2 or
+            y_mid_without_mother, x_start_without_mother, x_end_without_mother, \
-                there_is_sep_with_child == 1):
+            there_is_sep_with_child, \
-                # there are top-level multi-colspan horizontal separators which overlap each other
+            y_mid_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, \
-                # or multiple top-level multi-colspan horizontal separators
+            new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order(
-                # or multi-colspan horizontal separators shorter than their respective top-level:
+                x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, y_max_hor_some)
                # todo: explain how this is dealt with
                try:
                    y_grenze = top + 300
                    up = (y_mid > top) & (y_mid <= y_grenze)
-                    args_early_ys=np.arange(len(y_mid))
+        # show multi-column separators
-                    #print(args_early_ys,'args_early_ys')
+        # dbg_plt([0, None, top, bot], "multi-column separators in current split", 
-                    #print(y_mid,'y_mid')
+        #         list(zip(peaks_neg_tot[x_starting], peaks_neg_tot[x_ending],
        #                  y_mid - top, y_max - top)), True)
-                    x_starting_up = x_starting[up]
+        if (reading_order_type == 1 or
-                    x_ending_up = x_ending[up]
+            len(y_mid_without_mother) >= 2 or
-                    y_mid_up = y_mid[up]
+            there_is_sep_with_child == 1):
-                    y_max_up = y_max[up]
+            # there are top-level multi-colspan horizontal separators which overlap each other
-                    args_up = args_early_ys[up]
+            # or multiple top-level multi-colspan horizontal separators
-                    #print(args_up,'args_up')
+            # or multi-colspan horizontal separators shorter than their respective top-level:
-                    #print(y_mid_up,'y_mid_up')
+            # todo: explain how this is dealt with
-                    #check if there is a big separator in this y_mains0
+            try:
-                    if len(y_mid_up) > 0:
+                y_grenze = top + 300
-                        # is there a separator with full-width span?
+                up = (y_mid > top) & (y_mid <= y_grenze)
                        main_separator = (x_starting_up == 0) & (x_ending_up == len(peaks_neg_tot) - 1)
                        y_mid_main_separator_up = y_mid_up[main_separator]
                        y_max_main_separator_up = y_max_up[main_separator]
                        args_main_to_deleted = args_up[main_separator]
                        #print(y_mid_main_separator_up,y_max_main_separator_up,args_main_to_deleted,'fffffjammmm')
                        if len(y_max_main_separator_up):
                            args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) ))
                            #print(args_to_be_kept,'args_to_be_kept')
                            boxes.append([0, peaks_neg_tot[-1],
                                          top, y_max_main_separator_up.max()])
                            # dbg_plt(boxes[-1], "near top main separator box")
                            top = y_max_main_separator_up.max()
-                            #print(top,'top')
+                args_early_ys=np.arange(len(y_mid))
-                            y_mid = y_mid[args_to_be_kept]
+                #print(args_early_ys,'args_early_ys')
-                            x_starting = x_starting[args_to_be_kept]
+                #print(y_mid,'y_mid')
                            x_ending = x_ending[args_to_be_kept]
                            y_max = y_max[args_to_be_kept]
-                            #print('galdiha')
+                x_starting_up = x_starting[up]
-                            y_grenze = top + 200
+                x_ending_up = x_ending[up]
-                            up = (y_mid > top) & (y_mid <= y_grenze)
+                y_mid_up = y_mid[up]
-                            args_early_ys2 = np.arange(len(y_mid))
+                y_max_up = y_max[up]
-                            x_starting_up = x_starting[up]
+                args_up = args_early_ys[up]
-                            x_ending_up = x_ending[up]
+                #print(args_up,'args_up')
-                            y_mid_up = y_mid[up]
+                #print(y_mid_up,'y_mid_up')
-                            y_max_up = y_max[up]
+                #check if there is a big separator in this y_mains0
-                            args_up2 = args_early_ys2[up]
+                if len(y_mid_up) > 0:
-                            #print(y_mid_up,x_starting_up,x_ending_up,'didid')
+                    # is there a separator with full-width span?
-                        else:
+                    main_separator = (x_starting_up == 0) & (x_ending_up == len(peaks_neg_tot) - 1)
-                            args_early_ys2 = args_early_ys
+                    y_mid_main_separator_up = y_mid_up[main_separator]
-                            args_up2 = args_up
+                    y_max_main_separator_up = y_max_up[main_separator]
                    args_main_to_deleted = args_up[main_separator]
                    #print(y_mid_main_separator_up,y_max_main_separator_up,args_main_to_deleted,'fffffjammmm')
                    if len(y_max_main_separator_up):
                        args_to_be_kept = np.array(list( set(args_early_ys) - set(args_main_to_deleted) ))
                        #print(args_to_be_kept,'args_to_be_kept')
                        boxes.append([0, peaks_neg_tot[-1],
                                      top, y_max_main_separator_up.max()])
                        # dbg_plt(boxes[-1], "near top main separator box")
                        top = y_max_main_separator_up.max()
-                        nodes_in = set()
+                        #print(top,'top')
-                        for ij in range(len(x_starting_up)):
+                        y_mid = y_mid[args_to_be_kept]
-                            nodes_in.update(range(x_starting_up[ij],
+                        x_starting = x_starting[args_to_be_kept]
-                                                  x_ending_up[ij]))
+                        x_ending = x_ending[args_to_be_kept]
-                        #print(nodes_in,'nodes_in')
+                        y_max = y_max[args_to_be_kept]
                        #print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
-                        if nodes_in == set(range(len(peaks_neg_tot)-1)):
+                        #print('galdiha')
-                            pass
+                        y_grenze = top + 200
-                        elif nodes_in == set(range(1, len(peaks_neg_tot)-1)):
+                        up = (y_mid > top) & (y_mid <= y_grenze)
-                            pass
+                        args_early_ys2 = np.arange(len(y_mid))
-                        else:
+                        x_starting_up = x_starting[up]
-                            #print('burdaydikh')
+                        x_ending_up = x_ending[up]
-                            args_to_be_kept2 = np.array(list( set(args_early_ys2) - set(args_up2) ))
+                        y_mid_up = y_mid[up]
-
+                        y_max_up = y_max[up]
-                            if len(args_to_be_kept2):
+                        args_up2 = args_early_ys2[up]
-                                #print(args_to_be_kept2, "args_to_be_kept2")
+                        #print(y_mid_up,x_starting_up,x_ending_up,'didid')
                                y_mid = y_mid[args_to_be_kept2]
                                x_starting = x_starting[args_to_be_kept2]
                                x_ending = x_ending[args_to_be_kept2]
                                y_max = y_max[args_to_be_kept2]
                    #int(top)
                    # order multi-column separators
                    y_mid_by_order=[]
                    x_start_by_order=[]
                    x_end_by_order=[]
                    if (reading_order_type == 1 or
                        len(x_end_with_child_without_mother) == 0):
                        if reading_order_type == 1:
                            # there are top-level multi-colspan horizontal separators which overlap each other
                            #print("adding all columns at top because of multiple overlapping mothers")
                            y_mid_by_order.append(top)
                            x_start_by_order.append(0)
                            x_end_by_order.append(len(peaks_neg_tot)-2)
                        else:
                            # there are no top-level multi-colspan horizontal separators which themselves
                            # contain shorter multi-colspan separators
                            #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
                            columns_covered_by_mothers = set()
                            for dj in range(len(x_start_without_mother)):
                                columns_covered_by_mothers.update(
                                    range(x_start_without_mother[dj],
                                          x_end_without_mother[dj]))
                            columns_not_covered = list(all_columns - columns_covered_by_mothers)
                            #print(columns_covered_by_mothers, "columns_covered_by_mothers")
                            #print(columns_not_covered, "columns_not_covered")
                            y_mid = np.append(y_mid, np.ones(len(columns_not_covered) +
                                                             len(x_start_without_mother),
                                                             dtype=int) * top)
                            ##y_mid_by_order = np.append(y_mid_by_order, [top] * len(columns_not_covered))
                            ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
                            x_starting = np.append(x_starting, np.array(columns_not_covered, int))
                            x_starting = np.append(x_starting, x_start_without_mother)
                            x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
                            x_ending = np.append(x_ending, x_end_without_mother)
                        ind_args=np.arange(len(y_mid))
                        #print(ind_args,'ind_args')
                        for column in range(len(peaks_neg_tot)-1):
                            #print(column,'column')
                            ind_args_in_col=ind_args[x_starting==column]
                            #print('babali2')
                            #print(ind_args_in_col,'ind_args_in_col')
                            #print(len(y_mid))
                            y_mid_column=y_mid[ind_args_in_col]
                            x_start_column=x_starting[ind_args_in_col]
                            x_end_column=x_ending[ind_args_in_col]
                            #print('babali3')
                            ind_args_col_sorted=np.argsort(y_mid_column)
                            y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
                            x_start_by_order.extend(x_start_column[ind_args_col_sorted])
                            x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
                    else:
                        args_early_ys2 = args_early_ys
                        args_up2 = args_up
                    nodes_in = set()
                    for ij in range(len(x_starting_up)):
                        nodes_in.update(range(x_starting_up[ij],
                                              x_ending_up[ij]))
                    #print(nodes_in,'nodes_in')
                    #print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))')
                    if nodes_in == set(range(len(peaks_neg_tot)-1)):
                        pass
                    elif nodes_in == set(range(1, len(peaks_neg_tot)-1)):
                        pass
                    else:
                        #print('burdaydikh')
                        args_to_be_kept2 = np.array(list( set(args_early_ys2) - set(args_up2) ))
                        if len(args_to_be_kept2):
                            #print(args_to_be_kept2, "args_to_be_kept2")
                            y_mid = y_mid[args_to_be_kept2]
                            x_starting = x_starting[args_to_be_kept2]
                            x_ending = x_ending[args_to_be_kept2]
                            y_max = y_max[args_to_be_kept2]
                #int(top)
                # order multi-column separators
                y_mid_by_order=[]
                x_start_by_order=[]
                x_end_by_order=[]
                if (reading_order_type == 1 or
                    len(x_end_with_child_without_mother) == 0):
                    if reading_order_type == 1:
                        # there are top-level multi-colspan horizontal separators which overlap each other
                        #print("adding all columns at top because of multiple overlapping mothers")
                        y_mid_by_order.append(top)
                        x_start_by_order.append(0)
                        x_end_by_order.append(len(peaks_neg_tot)-2)
                    else:
                        # there are no top-level multi-colspan horizontal separators which themselves
                        # contain shorter multi-colspan separators
                        #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
                        columns_covered_by_mothers = set()
                        for dj in range(len(x_start_without_mother)):
@ -1895,212 +1858,170 @@ def return_boxes_of_images_by_order_of_reading_new(
                        x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
                        x_ending = np.append(x_ending, x_end_without_mother)
-                        columns_covered_by_mothers_with_child = set()
+                    ind_args=np.arange(len(y_mid))
-                        for dj in range(len(x_end_with_child_without_mother)):
+                    #print(ind_args,'ind_args')
-                            columns_covered_by_mothers_with_child.update(
+                    for column in range(len(peaks_neg_tot)-1):
-                                range(x_start_with_child_without_mother[dj],
+                        #print(column,'column')
-                                      x_end_with_child_without_mother[dj]))
+                        ind_args_in_col=ind_args[x_starting==column]
-                        #print(columns_covered_by_mothers_with_child, "columns_covered_by_mothers_with_child")
+                        #print('babali2')
-                        columns_not_covered_by_mothers_with_child = list(
+                        #print(ind_args_in_col,'ind_args_in_col')
-                            all_columns - columns_covered_by_mothers_with_child)
+                        #print(len(y_mid))
-                        #indexes_to_be_spanned=[]
+                        y_mid_column=y_mid[ind_args_in_col]
-                        for i_s in range(len(x_end_with_child_without_mother)):
+                        x_start_column=x_starting[ind_args_in_col]
-                            columns_not_covered_by_mothers_with_child.append(x_start_with_child_without_mother[i_s])
+                        x_end_column=x_ending[ind_args_in_col]
-                        columns_not_covered_by_mothers_with_child = np.sort(columns_not_covered_by_mothers_with_child)
+                        #print('babali3')
-                        #print(columns_not_covered_by_mothers_with_child, "columns_not_covered_by_mothers_with_child")
+                        ind_args_col_sorted=np.argsort(y_mid_column)
-                        ind_args = np.arange(len(y_mid))
+                        y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
-                        for i_s_nc in columns_not_covered_by_mothers_with_child:
+                        x_start_by_order.extend(x_start_column[ind_args_col_sorted])
-                            if i_s_nc in x_start_with_child_without_mother:
+                        x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
                                # use only seps with mother's span ("biggest")
                                #print("i_s_nc", i_s_nc)
                                x_end_biggest_column = \
                                    x_end_with_child_without_mother[
                                        x_start_with_child_without_mother == i_s_nc][0]
                                args_all_biggest_seps = \
                                    ind_args[(x_starting == i_s_nc) &
                                             (x_ending == x_end_biggest_column)]
                                y_mid_column_nc = y_mid[args_all_biggest_seps]
                                #print("%d:%d" % (i_s_nc, x_end_biggest_column), "columns covered by mother with child")
                                #x_start_column_nc = x_starting[args_all_biggest_seps]
                                #x_end_column_nc = x_ending[args_all_biggest_seps]
                                y_mid_column_nc = np.sort(y_mid_column_nc)
                                #print(y_mid_column_nc, "y_mid_column_nc (sorted)")
                                for nc_top, nc_bot in pairwise(np.append(y_mid_column_nc, bot)):
                                    #print("i_c", i_c)
                                    #print("%d:%d" % (nc_top, nc_bot), "y_mid_column_nc")
                                    ind_all_seps_between_nm_wc = \
                                        ind_args[(y_mid > nc_top) &
                                                 (y_mid < nc_bot) &
                                                 (x_starting >= i_s_nc) &
                                                 (x_ending <= x_end_biggest_column)]
                                    y_mid_all_between_nm_wc = y_mid[ind_all_seps_between_nm_wc]
                                    x_starting_all_between_nm_wc = x_starting[ind_all_seps_between_nm_wc]
                                    x_ending_all_between_nm_wc = x_ending[ind_all_seps_between_nm_wc]
                                    columns_covered_by_mothers = set()
                                    for dj in range(len(ind_all_seps_between_nm_wc)):
                                        columns_covered_by_mothers.update(
                                            range(x_starting_all_between_nm_wc[dj],
                                                  x_ending_all_between_nm_wc[dj]))
                                    #print(columns_covered_by_mothers, "columns_covered_by_mothers")
                                    child_columns = set(range(i_s_nc, x_end_biggest_column))
                                    columns_not_covered = list(child_columns - columns_covered_by_mothers)
                                    #print(child_columns, "child_columns")
                                    #print(columns_not_covered, "columns_not_covered")
                                    if len(ind_all_seps_between_nm_wc):
                                        biggest = np.argmax(x_ending_all_between_nm_wc -
                                                            x_starting_all_between_nm_wc)
                                        #print(ind_all_seps_between_nm_wc, "ind_all_seps_between_nm_wc")
                                        #print(biggest, "%d:%d" % (x_starting_all_between_nm_wc[biggest],
                                                                  x_ending_all_between_nm_wc[biggest]), "biggest")
                                        if columns_covered_by_mothers == set(
                                                range(x_starting_all_between_nm_wc[biggest],
                                                      x_ending_all_between_nm_wc[biggest])):
                                            # single biggest accounts for all covered columns alone,
                                            # this separator should be extended to cover all
                                            seps_too_close_to_top_separator = \
                                                ((y_mid_all_between_nm_wc > nc_top) &
                                                 (y_mid_all_between_nm_wc <= nc_top + 500))
                                            if (np.count_nonzero(seps_too_close_to_top_separator) and
                                                np.count_nonzero(seps_too_close_to_top_separator) <
                                                len(ind_all_seps_between_nm_wc)):
                                                #print(seps_too_close_to_top_separator, "seps_too_close_to_top_separator")
                                                y_mid_all_between_nm_wc = \
                                                    y_mid_all_between_nm_wc[~seps_too_close_to_top_separator]
                                                x_starting_all_between_nm_wc = \
                                                    x_starting_all_between_nm_wc[~seps_too_close_to_top_separator]
                                                x_ending_all_between_nm_wc = \
                                                    x_ending_all_between_nm_wc[~seps_too_close_to_top_separator]
                                            y_mid_all_between_nm_wc = np.append(
                                                y_mid_all_between_nm_wc, nc_top)
                                            x_starting_all_between_nm_wc = np.append(
                                                x_starting_all_between_nm_wc, i_s_nc)
                                            x_ending_all_between_nm_wc = np.append(
                                                x_ending_all_between_nm_wc, x_end_biggest_column)
                                        else:
                                            y_mid_all_between_nm_wc = np.append(
                                                y_mid_all_between_nm_wc, nc_top)
                                            x_starting_all_between_nm_wc = np.append(
                                                x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest])
                                            x_ending_all_between_nm_wc = np.append(
                                                x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest])
                                    if len(columns_not_covered):
                                        y_mid_all_between_nm_wc = np.append(
                                            y_mid_all_between_nm_wc, [nc_top] * len(columns_not_covered))
                                        x_starting_all_between_nm_wc = np.append(
                                            x_starting_all_between_nm_wc, np.array(columns_not_covered, int))
                                        x_ending_all_between_nm_wc = np.append(
                                            x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1)
                                    ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
                                    for column in range(int(i_s_nc), int(x_end_biggest_column)):
                                        ind_args_in_col=ind_args_between[x_starting_all_between_nm_wc==column]
                                        #print('babali2')
                                        #print(ind_args_in_col,'ind_args_in_col')
                                        #print(len(y_mid))
                                        y_mid_column=y_mid_all_between_nm_wc[ind_args_in_col]
                                        x_start_column=x_starting_all_between_nm_wc[ind_args_in_col]
                                        x_end_column=x_ending_all_between_nm_wc[ind_args_in_col]
                                        #print('babali3')
                                        ind_args_col_sorted=np.argsort(y_mid_column)
                                        y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
                                        x_start_by_order.extend(x_start_column[ind_args_col_sorted])
                                        x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
                            else:
                                #print(i_s_nc,'column not covered by mothers with child')
                                ind_args_in_col=ind_args[x_starting==i_s_nc]
                                #print('babali2')
                                #print(ind_args_in_col,'ind_args_in_col')
                                #print(len(y_mid))
                                y_mid_column=y_mid[ind_args_in_col]
                                x_start_column=x_starting[ind_args_in_col]
                                x_end_column=x_ending[ind_args_in_col]
                                #print('babali3')
                                ind_args_col_sorted = np.argsort(y_mid_column)
                                y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
                                x_start_by_order.extend(x_start_column[ind_args_col_sorted])
                                x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
                    # create single-column boxes from multi-column separators
                    y_mid_by_order = np.array(y_mid_by_order)
                    x_start_by_order = np.array(x_start_by_order)
                    x_end_by_order = np.array(x_end_by_order)
                    for il in range(len(y_mid_by_order)):
                        #print(il, "il")
                        y_mid_itself = y_mid_by_order[il]
                        x_start_itself = x_start_by_order[il]
                        x_end_itself = x_end_by_order[il]
                        for column in range(int(x_start_itself), int(x_end_itself)+1):
                            #print(column,'cols')
                            #print('burda')
                            #print('burda2')
                            y_mid_next = y_mid_by_order[(y_mid_itself < y_mid_by_order) &
                                                        (column >= x_start_by_order) &
                                                        (column <= x_end_by_order)]
                            y_mid_next = y_mid_next.min(initial=bot)
                            #print(y_mid_next,'y_mid_next')
                            #print(y_mid_itself,'y_mid_itself')
                            boxes.append([peaks_neg_tot[column],
                                          peaks_neg_tot[column+1],
                                          y_mid_itself,
                                          y_mid_next])
                            # dbg_plt(boxes[-1], "A column %d box" % (column + 1))
                except:
                    logger.exception("cannot assign boxes")
                    boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
                                  top, bot])
                    # dbg_plt(boxes[-1], "fallback box")
            else:
                # order multi-column separators
                y_mid_by_order=[]
                x_start_by_order=[]
                x_end_by_order=[]
                if len(x_starting)>0:
                    columns_covered_by_seps_covered_more_than_2col = set()
                    for dj in range(len(x_starting)):
                        if set(range(x_starting[dj], x_ending[dj])) != all_columns:
                            columns_covered_by_seps_covered_more_than_2col.update(
                                range(x_starting[dj], x_ending[dj]))
                    columns_not_covered = list(all_columns - columns_covered_by_seps_covered_more_than_2col)
                    y_mid = np.append(y_mid, np.ones(len(columns_not_covered) + 1,
                                                     dtype=int) * top)
                    ##y_mid_by_order = np.append(y_mid_by_order, [top] * len(columns_not_covered))
                    ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
                    x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
                    x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
                    if len(new_main_sep_y) > 0:
                        x_starting = np.append(x_starting, 0)
                        x_ending = np.append(x_ending, len(peaks_neg_tot) - 1)
                    else:
                        x_starting = np.append(x_starting, x_starting[0])
                        x_ending = np.append(x_ending, x_ending[0])
                else:
-                    columns_not_covered = list(all_columns)
+                    #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo')
-                    y_mid = np.append(y_mid, np.ones(len(columns_not_covered),
+                    columns_covered_by_mothers = set()
                    for dj in range(len(x_start_without_mother)):
                        columns_covered_by_mothers.update(
                            range(x_start_without_mother[dj],
                                  x_end_without_mother[dj]))
                    columns_not_covered = list(all_columns - columns_covered_by_mothers)
                    #print(columns_covered_by_mothers, "columns_covered_by_mothers")
                    #print(columns_not_covered, "columns_not_covered")
                    y_mid = np.append(y_mid, np.ones(len(columns_not_covered) +
                                                     len(x_start_without_mother),
                                                     dtype=int) * top)
                    ##y_mid_by_order = np.append(y_mid_by_order, [top] * len(columns_not_covered))
                    ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
-                    x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
+                    x_starting = np.append(x_starting, np.array(columns_not_covered, int))
-                    x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
+                    x_starting = np.append(x_starting, x_start_without_mother)
                    x_ending = np.append(x_ending, np.array(columns_not_covered, int) + 1)
                    x_ending = np.append(x_ending, x_end_without_mother)
-                ind_args = np.arange(len(y_mid))
+                    columns_covered_by_mothers_with_child = set()
                    for dj in range(len(x_end_with_child_without_mother)):
                        columns_covered_by_mothers_with_child.update(
                            range(x_start_with_child_without_mother[dj],
                                  x_end_with_child_without_mother[dj]))
                    #print(columns_covered_by_mothers_with_child, "columns_covered_by_mothers_with_child")
                    columns_not_covered_by_mothers_with_child = list(
                        all_columns - columns_covered_by_mothers_with_child)
                    #indexes_to_be_spanned=[]
                    for i_s in range(len(x_end_with_child_without_mother)):
                        columns_not_covered_by_mothers_with_child.append(x_start_with_child_without_mother[i_s])
                    columns_not_covered_by_mothers_with_child = np.sort(columns_not_covered_by_mothers_with_child)
                    #print(columns_not_covered_by_mothers_with_child, "columns_not_covered_by_mothers_with_child")
                    ind_args = np.arange(len(y_mid))
                    for i_s_nc in columns_not_covered_by_mothers_with_child:
                        if i_s_nc in x_start_with_child_without_mother:
                            # use only seps with mother's span ("biggest")
                            #print("i_s_nc", i_s_nc)
                            x_end_biggest_column = \
                                x_end_with_child_without_mother[
                                    x_start_with_child_without_mother == i_s_nc][0]
                            args_all_biggest_seps = \
                                ind_args[(x_starting == i_s_nc) &
                                         (x_ending == x_end_biggest_column)]
                            y_mid_column_nc = y_mid[args_all_biggest_seps]
                            #print("%d:%d" % (i_s_nc, x_end_biggest_column), "columns covered by mother with child")
                            #x_start_column_nc = x_starting[args_all_biggest_seps]
                            #x_end_column_nc = x_ending[args_all_biggest_seps]
                            y_mid_column_nc = np.sort(y_mid_column_nc)
                            #print(y_mid_column_nc, "y_mid_column_nc (sorted)")
                            for nc_top, nc_bot in pairwise(np.append(y_mid_column_nc, bot)):
                                #print("i_c", i_c)
                                #print("%d:%d" % (nc_top, nc_bot), "y_mid_column_nc")
                                ind_all_seps_between_nm_wc = \
                                    ind_args[(y_mid > nc_top) &
                                             (y_mid < nc_bot) &
                                             (x_starting >= i_s_nc) &
                                             (x_ending <= x_end_biggest_column)]
                                y_mid_all_between_nm_wc = y_mid[ind_all_seps_between_nm_wc]
                                x_starting_all_between_nm_wc = x_starting[ind_all_seps_between_nm_wc]
                                x_ending_all_between_nm_wc = x_ending[ind_all_seps_between_nm_wc]
-                for column in range(len(peaks_neg_tot)-1):
+                                columns_covered_by_mothers = set()
-                    #print(column,'column')
+                                for dj in range(len(ind_all_seps_between_nm_wc)):
-                    ind_args_in_col=ind_args[x_starting==column]
+                                    columns_covered_by_mothers.update(
-                    #print(len(y_mid))
+                                        range(x_starting_all_between_nm_wc[dj],
-                    y_mid_column=y_mid[ind_args_in_col]
+                                              x_ending_all_between_nm_wc[dj]))
-                    x_start_column=x_starting[ind_args_in_col]
+                                #print(columns_covered_by_mothers, "columns_covered_by_mothers")
-                    x_end_column=x_ending[ind_args_in_col]
+                                child_columns = set(range(i_s_nc, x_end_biggest_column))
                                columns_not_covered = list(child_columns - columns_covered_by_mothers)
                                #print(child_columns, "child_columns")
                                #print(columns_not_covered, "columns_not_covered")
-                    ind_args_col_sorted = np.argsort(y_mid_column)
+                                if len(ind_all_seps_between_nm_wc):
-                    y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
+                                    biggest = np.argmax(x_ending_all_between_nm_wc -
-                    x_start_by_order.extend(x_start_column[ind_args_col_sorted])
+                                                        x_starting_all_between_nm_wc)
-                    x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
+                                    #print(ind_all_seps_between_nm_wc, "ind_all_seps_between_nm_wc")
                                    #print(biggest, "%d:%d" % (x_starting_all_between_nm_wc[biggest],
                                                              x_ending_all_between_nm_wc[biggest]), "biggest")
                                    if columns_covered_by_mothers == set(
                                            range(x_starting_all_between_nm_wc[biggest],
                                                  x_ending_all_between_nm_wc[biggest])):
                                        # single biggest accounts for all covered columns alone,
                                        # this separator should be extended to cover all
                                        seps_too_close_to_top_separator = \
                                            ((y_mid_all_between_nm_wc > nc_top) &
                                             (y_mid_all_between_nm_wc <= nc_top + 500))
                                        if (np.count_nonzero(seps_too_close_to_top_separator) and
                                            np.count_nonzero(seps_too_close_to_top_separator) <
                                            len(ind_all_seps_between_nm_wc)):
                                            #print(seps_too_close_to_top_separator, "seps_too_close_to_top_separator")
                                            y_mid_all_between_nm_wc = \
                                                y_mid_all_between_nm_wc[~seps_too_close_to_top_separator]
                                            x_starting_all_between_nm_wc = \
                                                x_starting_all_between_nm_wc[~seps_too_close_to_top_separator]
                                            x_ending_all_between_nm_wc = \
                                                x_ending_all_between_nm_wc[~seps_too_close_to_top_separator]
                                        y_mid_all_between_nm_wc = np.append(
                                            y_mid_all_between_nm_wc, nc_top)
                                        x_starting_all_between_nm_wc = np.append(
                                            x_starting_all_between_nm_wc, i_s_nc)
                                        x_ending_all_between_nm_wc = np.append(
                                            x_ending_all_between_nm_wc, x_end_biggest_column)
                                    else:
                                        y_mid_all_between_nm_wc = np.append(
                                            y_mid_all_between_nm_wc, nc_top)
                                        x_starting_all_between_nm_wc = np.append(
                                            x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest])
                                        x_ending_all_between_nm_wc = np.append(
                                            x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest])
                                if len(columns_not_covered):
                                    y_mid_all_between_nm_wc = np.append(
                                        y_mid_all_between_nm_wc, [nc_top] * len(columns_not_covered))
                                    x_starting_all_between_nm_wc = np.append(
                                        x_starting_all_between_nm_wc, np.array(columns_not_covered, int))
                                    x_ending_all_between_nm_wc = np.append(
                                        x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1)
                                ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
                                for column in range(int(i_s_nc), int(x_end_biggest_column)):
                                    ind_args_in_col=ind_args_between[x_starting_all_between_nm_wc==column]
                                    #print('babali2')
                                    #print(ind_args_in_col,'ind_args_in_col')
                                    #print(len(y_mid))
                                    y_mid_column=y_mid_all_between_nm_wc[ind_args_in_col]
                                    x_start_column=x_starting_all_between_nm_wc[ind_args_in_col]
                                    x_end_column=x_ending_all_between_nm_wc[ind_args_in_col]
                                    #print('babali3')
                                    ind_args_col_sorted=np.argsort(y_mid_column)
                                    y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
                                    x_start_by_order.extend(x_start_column[ind_args_col_sorted])
                                    x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
                        else:
                            #print(i_s_nc,'column not covered by mothers with child')
                            ind_args_in_col=ind_args[x_starting==i_s_nc]
                            #print('babali2')
                            #print(ind_args_in_col,'ind_args_in_col')
                            #print(len(y_mid))
                            y_mid_column=y_mid[ind_args_in_col]
                            x_start_column=x_starting[ind_args_in_col]
                            x_end_column=x_ending[ind_args_in_col]
                            #print('babali3')
                            ind_args_col_sorted = np.argsort(y_mid_column)
                            y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
                            x_start_by_order.extend(x_start_column[ind_args_col_sorted])
                            x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
                # create single-column boxes from multi-column separators
                y_mid_by_order = np.array(y_mid_by_order)
@ -2109,23 +2030,101 @@ def return_boxes_of_images_by_order_of_reading_new(
                for il in range(len(y_mid_by_order)):
                    #print(il, "il")
                    y_mid_itself = y_mid_by_order[il]
                    #print(y_mid_itself,'y_mid_itself')
                    x_start_itself = x_start_by_order[il]
                    x_end_itself = x_end_by_order[il]
-                    for column in range(x_start_itself, x_end_itself+1):
+                    for column in range(int(x_start_itself), int(x_end_itself)+1):
                        #print(column,'cols')
                        #print('burda')
                        #print('burda2')
                        y_mid_next = y_mid_by_order[(y_mid_itself < y_mid_by_order) &
                                                    (column >= x_start_by_order) &
                                                    (column <= x_end_by_order)]
                        #print(y_mid_next,'y_mid_next')
                        y_mid_next = y_mid_next.min(initial=bot)
                        #print(y_mid_next,'y_mid_next')
                        #print(y_mid_itself,'y_mid_itself')
                        boxes.append([peaks_neg_tot[column],
                                      peaks_neg_tot[column+1],
                                      y_mid_itself,
                                      y_mid_next])
-                        # dbg_plt(boxes[-1], "B column %d box" % (column + 1))
+                        # dbg_plt(boxes[-1], "A column %d box" % (column + 1))
            except:
                logger.exception("cannot assign boxes")
                boxes.append([0, peaks_neg_tot[len(peaks_neg_tot)-1],
                              top, bot])
                # dbg_plt(boxes[-1], "fallback box")
        else:
            # order multi-column separators
            y_mid_by_order=[]
            x_start_by_order=[]
            x_end_by_order=[]
            if len(x_starting)>0:
                columns_covered_by_seps_covered_more_than_2col = set()
                for dj in range(len(x_starting)):
                    if set(range(x_starting[dj], x_ending[dj])) != all_columns:
                        columns_covered_by_seps_covered_more_than_2col.update(
                            range(x_starting[dj], x_ending[dj]))
                columns_not_covered = list(all_columns - columns_covered_by_seps_covered_more_than_2col)
                y_mid = np.append(y_mid, np.ones(len(columns_not_covered) + 1,
                                                 dtype=int) * top)
                ##y_mid_by_order = np.append(y_mid_by_order, [top] * len(columns_not_covered))
                ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
                x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
                x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
                if len(new_main_sep_y) > 0:
                    x_starting = np.append(x_starting, 0)
                    x_ending = np.append(x_ending, len(peaks_neg_tot) - 1)
                else:
                    x_starting = np.append(x_starting, x_starting[0])
                    x_ending = np.append(x_ending, x_ending[0])
            else:
                columns_not_covered = list(all_columns)
                y_mid = np.append(y_mid, np.ones(len(columns_not_covered),
                                                 dtype=int) * top)
                ##y_mid_by_order = np.append(y_mid_by_order, [top] * len(columns_not_covered))
                ##x_start_by_order = np.append(x_start_by_order, [0] * len(columns_not_covered))
                x_starting = np.append(x_starting, np.array(columns_not_covered, x_starting.dtype))
                x_ending = np.append(x_ending, np.array(columns_not_covered, x_ending.dtype) + 1)
            ind_args = np.arange(len(y_mid))
            for column in range(len(peaks_neg_tot)-1):
                #print(column,'column')
                ind_args_in_col=ind_args[x_starting==column]
                #print(len(y_mid))
                y_mid_column=y_mid[ind_args_in_col]
                x_start_column=x_starting[ind_args_in_col]
                x_end_column=x_ending[ind_args_in_col]
                ind_args_col_sorted = np.argsort(y_mid_column)
                y_mid_by_order.extend(y_mid_column[ind_args_col_sorted])
                x_start_by_order.extend(x_start_column[ind_args_col_sorted])
                x_end_by_order.extend(x_end_column[ind_args_col_sorted] - 1)
            # create single-column boxes from multi-column separators
            y_mid_by_order = np.array(y_mid_by_order)
            x_start_by_order = np.array(x_start_by_order)
            x_end_by_order = np.array(x_end_by_order)
            for il in range(len(y_mid_by_order)):
                #print(il, "il")
                y_mid_itself = y_mid_by_order[il]
                #print(y_mid_itself,'y_mid_itself')
                x_start_itself = x_start_by_order[il]
                x_end_itself = x_end_by_order[il]
                for column in range(x_start_itself, x_end_itself+1):
                    #print(column,'cols')
                    #print('burda2')
                    y_mid_next = y_mid_by_order[(y_mid_itself < y_mid_by_order) &
                                                (column >= x_start_by_order) &
                                                (column <= x_end_by_order)]
                    #print(y_mid_next,'y_mid_next')
                    y_mid_next = y_mid_next.min(initial=bot)
                    #print(y_mid_next,'y_mid_next')
                    boxes.append([peaks_neg_tot[column],
                                  peaks_neg_tot[column+1],
                                  y_mid_itself,
                                  y_mid_next])
                    # dbg_plt(boxes[-1], "B column %d box" % (column + 1))
    if right2left_readingorder:
        peaks_neg_tot_tables_new = []