diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 27a85da..92da14a 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -15,10 +15,21 @@ from scipy.ndimage import gaussian_filter1d from .is_nan import isNaN from .contour import (contours_in_same_horizon, + find_center_of_contours, find_new_features_of_contours, return_contours_of_image, return_parent_contours) +def pairwise(iterable): + # pairwise('ABCDEFG') → AB BC CD DE EF FG + + iterator = iter(iterable) + a = next(iterator, None) + + for b in iterator: + yield a, b + a = b + def return_x_start_end_mothers_childs_and_type_of_reading_order( x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff): @@ -1183,106 +1194,45 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) textlines_con_changed.append(textlines_big_org_form) return textlines_con_changed -def order_of_regions(textline_mask, contours_main, contours_header, y_ref): +def order_of_regions(textline_mask, contours_main, contours_head, y_ref): ##plt.imshow(textline_mask) ##plt.show() - """ - print(len(contours_main),'contours_main') - mada_n=textline_mask.sum(axis=1) - y=mada_n[:] - - y_help=np.zeros(len(y)+40) - y_help[20:len(y)+20]=y - x=np.arange(len(y)) - - peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - ##plt.imshow(textline_mask[:,:]) - ##plt.show() - - sigma_gaus=8 - z= gaussian_filter1d(y_help, sigma_gaus) - zneg_rev=-y_help+np.max(y_help) - zneg=np.zeros(len(zneg_rev)+40) - zneg[20:len(zneg_rev)+20]=zneg_rev - zneg= gaussian_filter1d(zneg, sigma_gaus) - - peaks, _ = find_peaks(z, height=0) - peaks_neg, _ = find_peaks(zneg, height=0) - peaks_neg=peaks_neg-20-20 - peaks=peaks-20 - """ - textline_sum_along_width = textline_mask.sum(axis=1) - - y = textline_sum_along_width[:] + y = textline_mask.sum(axis=1) # horizontal projection profile y_padded = np.zeros(len(y) + 40) y_padded[20 : len(y) + 20] = y - x = np.arange(len(y)) - - peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) sigma_gaus = 8 - z = gaussian_filter1d(y_padded, sigma_gaus) - zneg_rev = -y_padded + np.max(y_padded) + #z = gaussian_filter1d(y_padded, sigma_gaus) + #peaks, _ = find_peaks(z, height=0) + #peaks = peaks - 20 + zneg_rev = np.max(y_padded) - y_padded zneg = np.zeros(len(zneg_rev) + 40) zneg[20 : len(zneg_rev) + 20] = zneg_rev zneg = gaussian_filter1d(zneg, sigma_gaus) - peaks, _ = find_peaks(z, height=0) peaks_neg, _ = find_peaks(zneg, height=0) peaks_neg = peaks_neg - 20 - 20 - peaks = peaks - 20 ##plt.plot(z) ##plt.show() - if contours_main != None: - areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) - M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] - cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) - x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) + cx_main, cy_main = find_center_of_contours(contours_main) + cx_head, cy_head = find_center_of_contours(contours_head) - y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) - y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) + peaks_neg_new = np.append(np.insert(peaks_neg, 0, 0), textline_mask.shape[0]) + # offset from bbox of mask + peaks_neg_new += y_ref - if len(contours_header) != None: - areas_header = np.array([cv2.contourArea(contours_header[j]) for j in range(len(contours_header))]) - M_header = [cv2.moments(contours_header[j]) for j in range(len(contours_header))] - cx_header = [(M_header[j]["m10"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))] - cy_header = [(M_header[j]["m01"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))] - x_min_header = np.array([np.min(contours_header[j][:, 0, 0]) for j in range(len(contours_header))]) - x_max_header = np.array([np.max(contours_header[j][:, 0, 0]) for j in range(len(contours_header))]) - - y_min_header = np.array([np.min(contours_header[j][:, 0, 1]) for j in range(len(contours_header))]) - y_max_header = np.array([np.max(contours_header[j][:, 0, 1]) for j in range(len(contours_header))]) - # print(cy_main,'mainy') - - peaks_neg_new = [] - peaks_neg_new.append(0 + y_ref) - for iii in range(len(peaks_neg)): - peaks_neg_new.append(peaks_neg[iii] + y_ref) - peaks_neg_new.append(textline_mask.shape[0] + y_ref) - - if len(cy_main) > 0 and np.max(cy_main) > np.max(peaks_neg_new): - cy_main = np.array(cy_main) * (np.max(peaks_neg_new) / np.max(cy_main)) - 10 - if contours_main != None: - indexer_main = np.arange(len(contours_main)) - if contours_main != None: - len_main = len(contours_main) - else: - len_main = 0 - - matrix_of_orders = np.zeros((len(contours_main) + len(contours_header), 5)) - matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_header)) + matrix_of_orders = np.zeros((len(contours_main) + len(contours_head), 5), dtype=int) + matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_head)) matrix_of_orders[: len(contours_main), 1] = 1 matrix_of_orders[len(contours_main) :, 1] = 2 matrix_of_orders[: len(contours_main), 2] = cx_main - matrix_of_orders[len(contours_main) :, 2] = cx_header + matrix_of_orders[len(contours_main) :, 2] = cx_head matrix_of_orders[: len(contours_main), 3] = cy_main - matrix_of_orders[len(contours_main) :, 3] = cy_header + matrix_of_orders[len(contours_main) :, 3] = cy_head matrix_of_orders[: len(contours_main), 4] = np.arange(len(contours_main)) - matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_header)) + matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_head)) # print(peaks_neg_new,'peaks_neg_new') # print(matrix_of_orders,'matrix_of_orders') @@ -1290,27 +1240,14 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): final_indexers_sorted = [] final_types = [] final_index_type = [] - for i in range(len(peaks_neg_new) - 1): - top = peaks_neg_new[i] - down = peaks_neg_new[i + 1] - indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & - ((matrix_of_orders[:, 3] < down))] - cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) & - ((matrix_of_orders[:, 3] < down))] - cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) & - ((matrix_of_orders[:, 3] < down))] - types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) & - (matrix_of_orders[:, 3] < down)] - index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) & - (matrix_of_orders[:, 3] < down)] + for top, bot in pairwise(peaks_neg_new): + indexes_in, types_in, cxs_in, cys_in, typed_indexes_in = \ + matrix_of_orders[(matrix_of_orders[:, 3] >= top) & + (matrix_of_orders[:, 3] < bot)].T sorted_inside = np.argsort(cxs_in) - ind_in_int = indexes_in[sorted_inside] - ind_in_type = types_of_text[sorted_inside] - ind_ind_type = index_types_of_text[sorted_inside] - for j in range(len(ind_in_int)): - final_indexers_sorted.append(int(ind_in_int[j])) - final_types.append(int(ind_in_type[j])) - final_index_type.append(int(ind_ind_type[j])) + final_indexers_sorted.extend(indexes_in[sorted_inside]) + final_types.extend(types_in[sorted_inside]) + final_index_type.extend(typed_indexes_in[sorted_inside]) ##matrix_of_orders[:len_main,4]=final_indexers_sorted[:]