mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-05-01 03:32:00 +02:00
get_marginals(): improve contour assignment…
- use undeskewed mask for contour comparisons instead of deskewed mask (less precise) - rename `text_with_lines` → `text_mask_d` - rename `mask_marginals` → `main_mask_d` - rename `text_regions` → `early_layout` - rename `...textline...` → `...text...`
This commit is contained in:
parent
6d55d0b87b
commit
68ceeec764
1 changed files with 77 additions and 89 deletions
|
|
@ -2,62 +2,63 @@ import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
from scipy.signal import find_peaks
|
from scipy.signal import find_peaks
|
||||||
from scipy.ndimage import gaussian_filter1d
|
from scipy.ndimage import gaussian_filter1d
|
||||||
from .contour import find_new_features_of_contours, return_contours_of_interested_region
|
from .contour import find_center_of_contours, return_contours_of_interested_region
|
||||||
from .resize import resize_image
|
from .resize import resize_image
|
||||||
from .rotate import rotate_image
|
from .rotate import rotate_image
|
||||||
|
|
||||||
def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None):
|
def get_marginals(text_mask_d, early_layout, num_col, slope_deskew, kernel=None,
|
||||||
# rs: text_with_lines should be called text_mask_d
|
label_text=1,
|
||||||
# rs: text_regions should be called early_layout (contains other classes, too)
|
label_marg=4,
|
||||||
# rs: text_with_lines is already deskewed, while text_regions is not...
|
):
|
||||||
mask_marginals = np.zeros_like(text_with_lines)
|
# rs: text_mask_d is already deskewed, while early_layout is not...
|
||||||
height, width = mask_marginals.shape
|
main_mask_d = np.zeros_like(text_mask_d)
|
||||||
|
height, width = main_mask_d.shape
|
||||||
|
|
||||||
##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3)
|
##text_mask_d=cv2.erode(text_mask_d,self.kernel,iterations=3)
|
||||||
text_with_lines_eroded = cv2.erode(text_with_lines,kernel,iterations=5)
|
text_mask_d_eroded = cv2.erode(text_mask_d,kernel,iterations=5)
|
||||||
|
|
||||||
if height <= 1500:
|
if height <= 1500:
|
||||||
pass
|
pass
|
||||||
elif 1500 < height <= 1800:
|
elif 1500 < height <= 1800:
|
||||||
# rs: why not / 1.5???
|
# rs: why not / 1.5???
|
||||||
text_with_lines = resize_image(text_with_lines, int(height * 1.5), width)
|
text_mask_d = resize_image(text_mask_d, int(height * 1.5), width)
|
||||||
text_with_lines = cv2.erode(text_with_lines, kernel, iterations=5)
|
text_mask_d = cv2.erode(text_mask_d, kernel, iterations=5)
|
||||||
# rs: and back to original size
|
# rs: and back to original size
|
||||||
text_with_lines = resize_image(text_with_lines, height, width)
|
text_mask_d = resize_image(text_mask_d, height, width)
|
||||||
else:
|
else:
|
||||||
# rs: why not / 1.8???
|
# rs: why not / 1.8???
|
||||||
text_with_lines = resize_image(text_with_lines, int(height * 1.8), width)
|
text_mask_d = resize_image(text_mask_d, int(height * 1.8), width)
|
||||||
text_with_lines = cv2.erode(text_with_lines, kernel, iterations=7)
|
text_mask_d = cv2.erode(text_mask_d, kernel, iterations=7)
|
||||||
# rs: and back to original size
|
# rs: and back to original size
|
||||||
text_with_lines = resize_image(text_with_lines, height, width)
|
text_mask_d = resize_image(text_mask_d, height, width)
|
||||||
|
|
||||||
kernel_hor = np.ones((1, 5), dtype=np.uint8)
|
kernel_hor = np.ones((1, 5), dtype=np.uint8)
|
||||||
text_with_lines = cv2.erode(text_with_lines, kernel_hor, iterations=6)
|
text_mask_d = cv2.erode(text_mask_d, kernel_hor, iterations=6)
|
||||||
text_with_lines_y = text_with_lines.sum(axis=0)
|
text_mask_d_y = text_mask_d.sum(axis=0)
|
||||||
text_with_lines_y_eroded = text_with_lines_eroded.sum(axis=0)
|
text_mask_d_y_eroded = text_mask_d_eroded.sum(axis=0)
|
||||||
|
|
||||||
max_textline_thickness_percent = 100. * text_with_lines_y_eroded.max() / height
|
max_text_thickness_percent = 100. * text_mask_d_y_eroded.max() / height
|
||||||
min_textline_thickness = max_textline_thickness_percent / 100. * height / 20.
|
min_text_thickness = max_text_thickness_percent / 100. * height / 20.
|
||||||
|
|
||||||
# plt.figure()
|
# plt.figure()
|
||||||
# ax1 = plt.subplot(2, 1, 1, title="text_with_lines_eroded")
|
# ax1 = plt.subplot(2, 1, 1, title="text_mask_d_eroded")
|
||||||
# ax1.imshow(text_with_lines_eroded, aspect='auto')
|
# ax1.imshow(text_mask_d_eroded, aspect='auto')
|
||||||
# ax2 = plt.subplot(2, 1, 2, title="text_with_lines_y_eroded", sharex=ax1)
|
# ax2 = plt.subplot(2, 1, 2, title="text_mask_d_y_eroded", sharex=ax1)
|
||||||
# ax2.plot(list(range(width)), text_with_lines_y_eroded)
|
# ax2.plot(list(range(width)), text_mask_d_y_eroded)
|
||||||
# ax2.hlines(int(0.14 * height), 0, width,
|
# ax2.hlines(int(0.14 * height), 0, width,
|
||||||
# label='max_textline_thickness=14%', colors='r')
|
# label='max_text_thickness=14%', colors='r')
|
||||||
# ax2.hlines([min_textline_thickness], 0, width,
|
# ax2.hlines([min_text_thickness], 0, width,
|
||||||
# label='min_textline_thickness', colors='g')
|
# label='min_text_thickness', colors='g')
|
||||||
# ax2.scatter([np.argmax(text_with_lines_y_eroded)],
|
# ax2.scatter([np.argmax(text_mask_d_y_eroded)],
|
||||||
# [text_with_lines_y_eroded.max()], color='r',
|
# [text_mask_d_y_eroded.max()], color='r',
|
||||||
# label='max = %d%%' % max_textline_thickness_percent)
|
# label='max = %d%%' % max_text_thickness_percent)
|
||||||
# plt.legend()
|
# plt.legend()
|
||||||
# plt.show()
|
# plt.show()
|
||||||
|
|
||||||
if max_textline_thickness_percent >= 14:
|
if max_text_thickness_percent >= 14:
|
||||||
text_with_lines_y_rev = np.max(text_with_lines_y) - text_with_lines_y
|
text_mask_d_y_rev = np.max(text_mask_d_y) - text_mask_d_y
|
||||||
|
|
||||||
region_sum_0 = gaussian_filter1d(text_with_lines_y, 1)
|
region_sum_0 = gaussian_filter1d(text_mask_d_y, 1)
|
||||||
first_nonzero = region_sum_0.nonzero()[0][0] # outer left
|
first_nonzero = region_sum_0.nonzero()[0][0] # outer left
|
||||||
last_nonzero = region_sum_0.nonzero()[0][-1] # outer right
|
last_nonzero = region_sum_0.nonzero()[0][-1] # outer right
|
||||||
mid_point = 0.5 * (last_nonzero + first_nonzero)
|
mid_point = 0.5 * (last_nonzero + first_nonzero)
|
||||||
|
|
@ -65,14 +66,14 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N
|
||||||
one_third_left = (mid_point - first_nonzero) / 3.0
|
one_third_left = (mid_point - first_nonzero) / 3.0
|
||||||
|
|
||||||
# rs: constrain the distance at least 2 characters at 12pt, retrieve height and prominence
|
# rs: constrain the distance at least 2 characters at 12pt, retrieve height and prominence
|
||||||
peaks, props = find_peaks(text_with_lines_y_rev, height=0, prominence=0, distance=30)
|
peaks, props = find_peaks(text_mask_d_y_rev, height=0, prominence=0, distance=30)
|
||||||
peaks_orig = np.copy(peaks)
|
peaks_orig = np.copy(peaks)
|
||||||
# rs: also calculate the product of prominence and height (for final selection)
|
# rs: also calculate the product of prominence and height (for final selection)
|
||||||
scores = np.zeros(peaks.max() + 1)
|
scores = np.zeros(peaks.max() + 1)
|
||||||
scores[peaks] = props['prominences'] * props['peak_heights']
|
scores[peaks] = props['prominences'] * props['peak_heights']
|
||||||
|
|
||||||
peaks = peaks[(peaks > first_nonzero) & (peaks < last_nonzero)]
|
peaks = peaks[(peaks > first_nonzero) & (peaks < last_nonzero)]
|
||||||
peaks = peaks[region_sum_0[peaks] < min_textline_thickness]
|
peaks = peaks[region_sum_0[peaks] < min_text_thickness]
|
||||||
|
|
||||||
if num_col == 1:
|
if num_col == 1:
|
||||||
peaks_right = peaks[peaks > mid_point]
|
peaks_right = peaks[peaks > mid_point]
|
||||||
|
|
@ -84,20 +85,20 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N
|
||||||
if len(peaks_left) == 0:
|
if len(peaks_left) == 0:
|
||||||
if len(peaks_right) == 0:
|
if len(peaks_right) == 0:
|
||||||
# plt.figure()
|
# plt.figure()
|
||||||
# ax1 = plt.subplot(2, 1, 1, title='text_with_lines (deskewed text+sep mask)')
|
# ax1 = plt.subplot(2, 1, 1, title='text_mask_d (deskewed text+sep mask)')
|
||||||
# ax1.imshow(text_with_lines, aspect='auto')
|
# ax1.imshow(text_mask_d, aspect='auto')
|
||||||
# ax1.vlines([first_nonzero], 0, height, label='first_nonzero', colors='r')
|
# ax1.vlines([first_nonzero], 0, height, label='first_nonzero', colors='r')
|
||||||
# ax1.vlines([last_nonzero], 0, height, label='last_nonzero', colors='r')
|
# ax1.vlines([last_nonzero], 0, height, label='last_nonzero', colors='r')
|
||||||
# ax1.vlines(peaks_left, 0, height, label='peaks_left', colors='orange')
|
# ax1.vlines(peaks_left, 0, height, label='peaks_left', colors='orange')
|
||||||
# ax1.vlines(peaks_right, 0, height, label='peaks_right', colors='orange')
|
# ax1.vlines(peaks_right, 0, height, label='peaks_right', colors='orange')
|
||||||
# ax2 = plt.subplot(2, 1, 2, title='text_with_lines_y (smoothed)', sharex=ax1)
|
# ax2 = plt.subplot(2, 1, 2, title='text_mask_d_y (smoothed)', sharex=ax1)
|
||||||
# ax2.plot(list(range(width)), region_sum_0)
|
# ax2.plot(list(range(width)), region_sum_0)
|
||||||
# ax2.hlines(min_textline_thickness, 0, width, colors='g',
|
# ax2.hlines(min_text_thickness, 0, width, colors='g',
|
||||||
# label='min_textline_thickness=%d' % min_textline_thickness)
|
# label='min_text_thickness=%d' % min_text_thickness)
|
||||||
# ax2.scatter(peaks_orig, region_sum_0[peaks_orig], label='peaks')
|
# ax2.scatter(peaks_orig, region_sum_0[peaks_orig], label='peaks')
|
||||||
# plt.legend()
|
# plt.legend()
|
||||||
# plt.show()
|
# plt.show()
|
||||||
return text_regions
|
return early_layout
|
||||||
point_right = peaks_right[np.argmax(scores[peaks_right])]
|
point_right = peaks_right[np.argmax(scores[peaks_right])]
|
||||||
#point_left = first_nonzero
|
#point_left = first_nonzero
|
||||||
point_left = 0
|
point_left = 0
|
||||||
|
|
@ -113,75 +114,62 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N
|
||||||
point_left = peaks_left[np.argmax(scores[peaks_left])]
|
point_left = peaks_left[np.argmax(scores[peaks_left])]
|
||||||
#point_right = last_nonzero
|
#point_right = last_nonzero
|
||||||
|
|
||||||
# rs: should be called mask_main (i.e. inverted semantics here)
|
main_mask_d[:, point_left: point_right] = 1
|
||||||
mask_marginals[:, point_left: point_right] = 1
|
|
||||||
|
|
||||||
# plt.figure()
|
# plt.figure()
|
||||||
# ax1 = plt.subplot(2, 2, 1)
|
# ax1 = plt.subplot(2, 2, 1)
|
||||||
# ax1.title.set_text('text_with_lines (deskewed text+sep mask)')
|
# ax1.title.set_text('text_mask_d (deskewed text+table mask)')
|
||||||
# ax1.imshow(text_with_lines)
|
# ax1.imshow(text_mask_d)
|
||||||
# ax1.vlines(peaks_left, 0, height, label='peaks_left', colors='b')
|
# ax1.vlines(peaks_left, 0, height, label='peaks_left', colors='b')
|
||||||
# ax1.vlines(peaks_right, 0, height, label='peaks_right', colors='b')
|
# ax1.vlines(peaks_right, 0, height, label='peaks_right', colors='b')
|
||||||
# ax1.vlines([first_nonzero], 0, height, label='first_nonzero', colors='g')
|
# ax1.vlines([first_nonzero], 0, height, label='first_nonzero', colors='g')
|
||||||
# ax1.vlines([last_nonzero], 0, height, label='last_nonzero', colors='g')
|
# ax1.vlines([last_nonzero], 0, height, label='last_nonzero', colors='g')
|
||||||
# ax1.vlines([point_left], 0, height, label='point_left', colors='r')
|
# ax1.vlines([point_left], 0, height, label='point_left', colors='r')
|
||||||
# ax1.vlines([point_right], 0, height, label='point_right', colors='r')
|
# ax1.vlines([point_right], 0, height, label='point_right', colors='r')
|
||||||
# ax2 = plt.subplot(2, 2, 2, title='mask_marginals (deskewed marginal mask)', sharey=ax1)
|
# ax2 = plt.subplot(2, 2, 2, title='main_mask_d (deskewed main mask)', sharey=ax1)
|
||||||
# ax2.imshow(mask_marginals)
|
# ax2.imshow(main_mask_d)
|
||||||
# ax3 = plt.subplot(2, 2, 3, title='text_with_lines_y (projection for minima)', sharex=ax1)
|
# ax3 = plt.subplot(2, 2, 3, title='text_mask_d_y (projection for minima)', sharex=ax1)
|
||||||
# ax3.plot(list(range(width)), text_with_lines_y)
|
# ax3.plot(list(range(width)), text_mask_d_y)
|
||||||
# ax3.set_aspect('auto')
|
# ax3.set_aspect('auto')
|
||||||
# ax4 = plt.subplot(2, 2, 4, title='text_regions (undeskewed labels)')
|
# ax4 = plt.subplot(2, 2, 4, title='early_layout (undeskewed labels)')
|
||||||
# ax4.imshow(text_regions)
|
# ax4.imshow(early_layout)
|
||||||
# plt.legend()
|
# plt.legend()
|
||||||
# plt.show()
|
# plt.show()
|
||||||
|
|
||||||
# rs: rotate back (into undeskewed/original shape as text_regions input):
|
# rs: rotate back (into undeskewed/original shape as early_layout input):
|
||||||
mask_marginals_rotated = rotate_image(mask_marginals, -slope_deskew)
|
main_mask = rotate_image(main_mask_d, -slope_deskew)
|
||||||
mask_marginals_rotated_y = mask_marginals_rotated.sum(axis=0)
|
main_mask_y = main_mask.sum(axis=0)
|
||||||
mask_marginals_rotated_y_nz = np.flatnonzero(mask_marginals_rotated_y)
|
main_mask_y_nz = np.flatnonzero(main_mask_y)
|
||||||
min_point_of_left_marginal = max(0, np.min(mask_marginals_rotated_y_nz) - 16)
|
|
||||||
max_point_of_right_marginal = min(width - 1, np.max(mask_marginals_rotated_y_nz) + 16)
|
|
||||||
|
|
||||||
min_area_text = 0.00001
|
min_area_text = 0.00001
|
||||||
# rs: why not extract from mask_marginals_rotated???
|
main_contour = return_contours_of_interested_region(main_mask, 1, min_area_text)[0]
|
||||||
# rs: why not largest area instead of first?
|
text_contours = return_contours_of_interested_region(early_layout, label_text, min_area_text)
|
||||||
polygon_mask_marginals_rotated = return_contours_of_interested_region(mask_marginals, 1, min_area_text)[0]
|
cx_text, cy_text = find_center_of_contours(text_contours)
|
||||||
polygons_of_marginals = return_contours_of_interested_region(text_regions, 1, min_area_text)
|
|
||||||
|
|
||||||
(cx_text_only,
|
marg_contours = []
|
||||||
cy_text_only,
|
for i, contour in enumerate(text_contours):
|
||||||
x_min_text_only,
|
if -1 == cv2.pointPolygonTest(main_contour,
|
||||||
x_max_text_only,
|
(cx_text[i],
|
||||||
y_min_text_only,
|
cy_text[i]),
|
||||||
y_max_text_only,
|
|
||||||
y_cor_x_min_main) = find_new_features_of_contours(polygons_of_marginals)
|
|
||||||
|
|
||||||
main_text_should_be_marginals = []
|
|
||||||
x_min_marginals_left=[]
|
|
||||||
x_min_marginals_right=[]
|
|
||||||
|
|
||||||
for i, polygon in enumerate(polygons_of_marginals):
|
|
||||||
if -1 == cv2.pointPolygonTest(polygon_mask_marginals_rotated,
|
|
||||||
(cx_text_only[i],
|
|
||||||
cy_text_only[i]),
|
|
||||||
False):
|
False):
|
||||||
main_text_should_be_marginals.append(polygon)
|
marg_contours.append(contour)
|
||||||
|
|
||||||
|
early_layout_orig = np.copy(early_layout)
|
||||||
|
early_layout = cv2.fillPoly(early_layout, pts=marg_contours, color=label_marg)
|
||||||
|
|
||||||
text_regions = cv2.fillPoly(text_regions, pts=main_text_should_be_marginals, color=4)
|
|
||||||
# plt.figure()
|
# plt.figure()
|
||||||
# ax1 = plt.subplot(2, 2, 1, title='mask_marginals (deskewed marginal mask)')
|
# ax1 = plt.subplot(2, 2, 1, title='main_mask_d (deskewed main mask)')
|
||||||
# plt.imshow(mask_marginals)
|
# plt.imshow(main_mask_d)
|
||||||
# ax2 = plt.subplot(2, 2, 2, title='mask_marginals_rotated (undeskewed marginal mask)')
|
# ax2 = plt.subplot(2, 2, 2, title='main_mask (undeskewed main mask)')
|
||||||
# plt.imshow(mask_marginals_rotated)
|
# plt.imshow(main_mask)
|
||||||
# ax4 = plt.subplot(2, 2, 4, title='text_regions (undeskewed labels split)')
|
# ax3 = plt.subplot(2, 2, 3, title='early_layout (undeskewed labels original)')
|
||||||
# plt.imshow(text_regions)
|
# plt.imshow(early_layout_orig)
|
||||||
|
# ax4 = plt.subplot(2, 2, 4, title='early_layout (undeskewed labels split)')
|
||||||
|
# plt.imshow(early_layout)
|
||||||
|
# plt.show()
|
||||||
|
#plt.imshow(early_layout)
|
||||||
#plt.show()
|
#plt.show()
|
||||||
|
|
||||||
#plt.imshow(text_regions)
|
|
||||||
#plt.show()
|
|
||||||
|
|
||||||
#sys.exit()
|
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
return text_regions
|
return early_layout
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue