diff --git a/src/eynollah/utils/marginals.py b/src/eynollah/utils/marginals.py index 8344b9a..5353ca7 100644 --- a/src/eynollah/utils/marginals.py +++ b/src/eynollah/utils/marginals.py @@ -2,62 +2,63 @@ import numpy as np import cv2 from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d -from .contour import find_new_features_of_contours, return_contours_of_interested_region +from .contour import find_center_of_contours, return_contours_of_interested_region from .resize import resize_image from .rotate import rotate_image -def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None): - # rs: text_with_lines should be called text_mask_d - # rs: text_regions should be called early_layout (contains other classes, too) - # rs: text_with_lines is already deskewed, while text_regions is not... - mask_marginals = np.zeros_like(text_with_lines) - height, width = mask_marginals.shape +def get_marginals(text_mask_d, early_layout, num_col, slope_deskew, kernel=None, + label_text=1, + label_marg=4, +): + # rs: text_mask_d is already deskewed, while early_layout is not... + main_mask_d = np.zeros_like(text_mask_d) + height, width = main_mask_d.shape - ##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3) - text_with_lines_eroded = cv2.erode(text_with_lines,kernel,iterations=5) + ##text_mask_d=cv2.erode(text_mask_d,self.kernel,iterations=3) + text_mask_d_eroded = cv2.erode(text_mask_d,kernel,iterations=5) if height <= 1500: pass elif 1500 < height <= 1800: # rs: why not / 1.5??? - text_with_lines = resize_image(text_with_lines, int(height * 1.5), width) - text_with_lines = cv2.erode(text_with_lines, kernel, iterations=5) + text_mask_d = resize_image(text_mask_d, int(height * 1.5), width) + text_mask_d = cv2.erode(text_mask_d, kernel, iterations=5) # rs: and back to original size - text_with_lines = resize_image(text_with_lines, height, width) + text_mask_d = resize_image(text_mask_d, height, width) else: # rs: why not / 1.8??? - text_with_lines = resize_image(text_with_lines, int(height * 1.8), width) - text_with_lines = cv2.erode(text_with_lines, kernel, iterations=7) + text_mask_d = resize_image(text_mask_d, int(height * 1.8), width) + text_mask_d = cv2.erode(text_mask_d, kernel, iterations=7) # rs: and back to original size - text_with_lines = resize_image(text_with_lines, height, width) + text_mask_d = resize_image(text_mask_d, height, width) kernel_hor = np.ones((1, 5), dtype=np.uint8) - text_with_lines = cv2.erode(text_with_lines, kernel_hor, iterations=6) - text_with_lines_y = text_with_lines.sum(axis=0) - text_with_lines_y_eroded = text_with_lines_eroded.sum(axis=0) + text_mask_d = cv2.erode(text_mask_d, kernel_hor, iterations=6) + text_mask_d_y = text_mask_d.sum(axis=0) + text_mask_d_y_eroded = text_mask_d_eroded.sum(axis=0) - max_textline_thickness_percent = 100. * text_with_lines_y_eroded.max() / height - min_textline_thickness = max_textline_thickness_percent / 100. * height / 20. + max_text_thickness_percent = 100. * text_mask_d_y_eroded.max() / height + min_text_thickness = max_text_thickness_percent / 100. * height / 20. # plt.figure() - # ax1 = plt.subplot(2, 1, 1, title="text_with_lines_eroded") - # ax1.imshow(text_with_lines_eroded, aspect='auto') - # ax2 = plt.subplot(2, 1, 2, title="text_with_lines_y_eroded", sharex=ax1) - # ax2.plot(list(range(width)), text_with_lines_y_eroded) + # ax1 = plt.subplot(2, 1, 1, title="text_mask_d_eroded") + # ax1.imshow(text_mask_d_eroded, aspect='auto') + # ax2 = plt.subplot(2, 1, 2, title="text_mask_d_y_eroded", sharex=ax1) + # ax2.plot(list(range(width)), text_mask_d_y_eroded) # ax2.hlines(int(0.14 * height), 0, width, - # label='max_textline_thickness=14%', colors='r') - # ax2.hlines([min_textline_thickness], 0, width, - # label='min_textline_thickness', colors='g') - # ax2.scatter([np.argmax(text_with_lines_y_eroded)], - # [text_with_lines_y_eroded.max()], color='r', - # label='max = %d%%' % max_textline_thickness_percent) + # label='max_text_thickness=14%', colors='r') + # ax2.hlines([min_text_thickness], 0, width, + # label='min_text_thickness', colors='g') + # ax2.scatter([np.argmax(text_mask_d_y_eroded)], + # [text_mask_d_y_eroded.max()], color='r', + # label='max = %d%%' % max_text_thickness_percent) # plt.legend() # plt.show() - if max_textline_thickness_percent >= 14: - text_with_lines_y_rev = np.max(text_with_lines_y) - text_with_lines_y + if max_text_thickness_percent >= 14: + text_mask_d_y_rev = np.max(text_mask_d_y) - text_mask_d_y - region_sum_0 = gaussian_filter1d(text_with_lines_y, 1) + region_sum_0 = gaussian_filter1d(text_mask_d_y, 1) first_nonzero = region_sum_0.nonzero()[0][0] # outer left last_nonzero = region_sum_0.nonzero()[0][-1] # outer right mid_point = 0.5 * (last_nonzero + first_nonzero) @@ -65,14 +66,14 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N one_third_left = (mid_point - first_nonzero) / 3.0 # rs: constrain the distance at least 2 characters at 12pt, retrieve height and prominence - peaks, props = find_peaks(text_with_lines_y_rev, height=0, prominence=0, distance=30) + peaks, props = find_peaks(text_mask_d_y_rev, height=0, prominence=0, distance=30) peaks_orig = np.copy(peaks) # rs: also calculate the product of prominence and height (for final selection) scores = np.zeros(peaks.max() + 1) scores[peaks] = props['prominences'] * props['peak_heights'] peaks = peaks[(peaks > first_nonzero) & (peaks < last_nonzero)] - peaks = peaks[region_sum_0[peaks] < min_textline_thickness] + peaks = peaks[region_sum_0[peaks] < min_text_thickness] if num_col == 1: peaks_right = peaks[peaks > mid_point] @@ -84,20 +85,20 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N if len(peaks_left) == 0: if len(peaks_right) == 0: # plt.figure() - # ax1 = plt.subplot(2, 1, 1, title='text_with_lines (deskewed text+sep mask)') - # ax1.imshow(text_with_lines, aspect='auto') + # ax1 = plt.subplot(2, 1, 1, title='text_mask_d (deskewed text+sep mask)') + # ax1.imshow(text_mask_d, aspect='auto') # ax1.vlines([first_nonzero], 0, height, label='first_nonzero', colors='r') # ax1.vlines([last_nonzero], 0, height, label='last_nonzero', colors='r') # ax1.vlines(peaks_left, 0, height, label='peaks_left', colors='orange') # ax1.vlines(peaks_right, 0, height, label='peaks_right', colors='orange') - # ax2 = plt.subplot(2, 1, 2, title='text_with_lines_y (smoothed)', sharex=ax1) + # ax2 = plt.subplot(2, 1, 2, title='text_mask_d_y (smoothed)', sharex=ax1) # ax2.plot(list(range(width)), region_sum_0) - # ax2.hlines(min_textline_thickness, 0, width, colors='g', - # label='min_textline_thickness=%d' % min_textline_thickness) + # ax2.hlines(min_text_thickness, 0, width, colors='g', + # label='min_text_thickness=%d' % min_text_thickness) # ax2.scatter(peaks_orig, region_sum_0[peaks_orig], label='peaks') # plt.legend() # plt.show() - return text_regions + return early_layout point_right = peaks_right[np.argmax(scores[peaks_right])] #point_left = first_nonzero point_left = 0 @@ -113,75 +114,62 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N point_left = peaks_left[np.argmax(scores[peaks_left])] #point_right = last_nonzero - # rs: should be called mask_main (i.e. inverted semantics here) - mask_marginals[:, point_left: point_right] = 1 + main_mask_d[:, point_left: point_right] = 1 # plt.figure() # ax1 = plt.subplot(2, 2, 1) - # ax1.title.set_text('text_with_lines (deskewed text+sep mask)') - # ax1.imshow(text_with_lines) + # ax1.title.set_text('text_mask_d (deskewed text+table mask)') + # ax1.imshow(text_mask_d) # ax1.vlines(peaks_left, 0, height, label='peaks_left', colors='b') # ax1.vlines(peaks_right, 0, height, label='peaks_right', colors='b') # ax1.vlines([first_nonzero], 0, height, label='first_nonzero', colors='g') # ax1.vlines([last_nonzero], 0, height, label='last_nonzero', colors='g') # ax1.vlines([point_left], 0, height, label='point_left', colors='r') # ax1.vlines([point_right], 0, height, label='point_right', colors='r') - # ax2 = plt.subplot(2, 2, 2, title='mask_marginals (deskewed marginal mask)', sharey=ax1) - # ax2.imshow(mask_marginals) - # ax3 = plt.subplot(2, 2, 3, title='text_with_lines_y (projection for minima)', sharex=ax1) - # ax3.plot(list(range(width)), text_with_lines_y) + # ax2 = plt.subplot(2, 2, 2, title='main_mask_d (deskewed main mask)', sharey=ax1) + # ax2.imshow(main_mask_d) + # ax3 = plt.subplot(2, 2, 3, title='text_mask_d_y (projection for minima)', sharex=ax1) + # ax3.plot(list(range(width)), text_mask_d_y) # ax3.set_aspect('auto') - # ax4 = plt.subplot(2, 2, 4, title='text_regions (undeskewed labels)') - # ax4.imshow(text_regions) + # ax4 = plt.subplot(2, 2, 4, title='early_layout (undeskewed labels)') + # ax4.imshow(early_layout) # plt.legend() # plt.show() - # rs: rotate back (into undeskewed/original shape as text_regions input): - mask_marginals_rotated = rotate_image(mask_marginals, -slope_deskew) - mask_marginals_rotated_y = mask_marginals_rotated.sum(axis=0) - mask_marginals_rotated_y_nz = np.flatnonzero(mask_marginals_rotated_y) - min_point_of_left_marginal = max(0, np.min(mask_marginals_rotated_y_nz) - 16) - max_point_of_right_marginal = min(width - 1, np.max(mask_marginals_rotated_y_nz) + 16) + # rs: rotate back (into undeskewed/original shape as early_layout input): + main_mask = rotate_image(main_mask_d, -slope_deskew) + main_mask_y = main_mask.sum(axis=0) + main_mask_y_nz = np.flatnonzero(main_mask_y) min_area_text = 0.00001 - # rs: why not extract from mask_marginals_rotated??? - # rs: why not largest area instead of first? - polygon_mask_marginals_rotated = return_contours_of_interested_region(mask_marginals, 1, min_area_text)[0] - polygons_of_marginals = return_contours_of_interested_region(text_regions, 1, min_area_text) + main_contour = return_contours_of_interested_region(main_mask, 1, min_area_text)[0] + text_contours = return_contours_of_interested_region(early_layout, label_text, min_area_text) + cx_text, cy_text = find_center_of_contours(text_contours) - (cx_text_only, - cy_text_only, - x_min_text_only, - x_max_text_only, - y_min_text_only, - y_max_text_only, - y_cor_x_min_main) = find_new_features_of_contours(polygons_of_marginals) - - main_text_should_be_marginals = [] - x_min_marginals_left=[] - x_min_marginals_right=[] - - for i, polygon in enumerate(polygons_of_marginals): - if -1 == cv2.pointPolygonTest(polygon_mask_marginals_rotated, - (cx_text_only[i], - cy_text_only[i]), + marg_contours = [] + for i, contour in enumerate(text_contours): + if -1 == cv2.pointPolygonTest(main_contour, + (cx_text[i], + cy_text[i]), False): - main_text_should_be_marginals.append(polygon) + marg_contours.append(contour) + + early_layout_orig = np.copy(early_layout) + early_layout = cv2.fillPoly(early_layout, pts=marg_contours, color=label_marg) - text_regions = cv2.fillPoly(text_regions, pts=main_text_should_be_marginals, color=4) # plt.figure() - # ax1 = plt.subplot(2, 2, 1, title='mask_marginals (deskewed marginal mask)') - # plt.imshow(mask_marginals) - # ax2 = plt.subplot(2, 2, 2, title='mask_marginals_rotated (undeskewed marginal mask)') - # plt.imshow(mask_marginals_rotated) - # ax4 = plt.subplot(2, 2, 4, title='text_regions (undeskewed labels split)') - # plt.imshow(text_regions) + # ax1 = plt.subplot(2, 2, 1, title='main_mask_d (deskewed main mask)') + # plt.imshow(main_mask_d) + # ax2 = plt.subplot(2, 2, 2, title='main_mask (undeskewed main mask)') + # plt.imshow(main_mask) + # ax3 = plt.subplot(2, 2, 3, title='early_layout (undeskewed labels original)') + # plt.imshow(early_layout_orig) + # ax4 = plt.subplot(2, 2, 4, title='early_layout (undeskewed labels split)') + # plt.imshow(early_layout) # plt.show() - - #plt.imshow(text_regions) + #plt.imshow(early_layout) #plt.show() - #sys.exit() else: pass - return text_regions + return early_layout