mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-26 23:34:13 +01:00
find_number_of_columns_in_document: improve splitter rule
extend horizontal separators to full img width if they do not overlap any other regions (only as regards to returned `splitter_y` result, but without changing returned separators mask)
This commit is contained in:
parent
542d38ab43
commit
5a0e4c3b0f
1 changed files with 11 additions and 0 deletions
|
|
@ -1378,6 +1378,8 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point):
|
||||||
return peaks_neg_tot
|
return peaks_neg_tot
|
||||||
|
|
||||||
def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, label_seps, contours_h=None):
|
def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, label_seps, contours_h=None):
|
||||||
|
ncomps, ccomps = cv2.connectedComponents(region_pre_p.astype(np.uint8))
|
||||||
|
|
||||||
separators_closeup = 1 * (region_pre_p == label_seps)
|
separators_closeup = 1 * (region_pre_p == label_seps)
|
||||||
separators_closeup[0:110] = 0
|
separators_closeup[0:110] = 0
|
||||||
separators_closeup[-150:] = 0
|
separators_closeup[-150:] = 0
|
||||||
|
|
@ -1398,10 +1400,19 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
|
||||||
min_xe = cnt[:, 0, 0].min()
|
min_xe = cnt[:, 0, 0].min()
|
||||||
max_ye = cnt[:, 0, 1].max()
|
max_ye = cnt[:, 0, 1].max()
|
||||||
min_ye = cnt[:, 0, 1].min()
|
min_ye = cnt[:, 0, 1].min()
|
||||||
|
med_ye = int(np.median(cnt[:, 0, 1]))
|
||||||
dist_xe = max_xe - min_xe
|
dist_xe = max_xe - min_xe
|
||||||
dist_ye = max_ye - min_ye
|
dist_ye = max_ye - min_ye
|
||||||
if dist_ye <= 50 and dist_xe >= 3 * dist_ye:
|
if dist_ye <= 50 and dist_xe >= 3 * dist_ye:
|
||||||
cnts_hor_e.append(cnt)
|
cnts_hor_e.append(cnt)
|
||||||
|
labels = np.setdiff1d(np.unique(ccomps[med_ye]), [0])
|
||||||
|
if len(labels) == 1:
|
||||||
|
# mid line does not intersect with any other region
|
||||||
|
# so add it as extra splitter line
|
||||||
|
cnts_hor_e.append(np.array([[[0, med_ye]],
|
||||||
|
[[ccomps.shape[1], med_ye]],
|
||||||
|
[[ccomps.shape[1], med_ye + 1]],
|
||||||
|
[[0, med_ye + 1]]]))
|
||||||
|
|
||||||
# delete horizontal contours (leaving only the edges)
|
# delete horizontal contours (leaving only the edges)
|
||||||
separators_closeup_n_binary = cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=0)
|
separators_closeup_n_binary = cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=0)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue