mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-07-04 16:39:56 +02:00
implent_law_head_main_not_parallel is unused
This commit is contained in:
parent
d7d388671d
commit
133982380f
3 changed files with 94 additions and 94 deletions
|
@ -1,5 +1,6 @@
|
||||||
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring
|
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring
|
||||||
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines
|
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines
|
||||||
|
# pylint: disable=too-many-public-methods
|
||||||
"""
|
"""
|
||||||
tool to extract table form data from alto xml data
|
tool to extract table form data from alto xml data
|
||||||
"""
|
"""
|
||||||
|
@ -74,7 +75,6 @@ from .utils import (
|
||||||
small_textlines_to_parent_adherence2,
|
small_textlines_to_parent_adherence2,
|
||||||
order_and_id_of_texts,
|
order_and_id_of_texts,
|
||||||
order_of_regions,
|
order_of_regions,
|
||||||
implent_law_head_main_not_parallel,
|
|
||||||
find_number_of_columns_in_document,
|
find_number_of_columns_in_document,
|
||||||
return_boxes_of_images_by_order_of_reading_new,
|
return_boxes_of_images_by_order_of_reading_new,
|
||||||
)
|
)
|
||||||
|
|
|
@ -3252,3 +3252,96 @@ def return_hor_spliter_by_index(peaks_neg_fin_t, x_min_hor_some, x_max_hor_some)
|
||||||
peaks_true.append(peaks_neg_fin_t[m])
|
peaks_true.append(peaks_neg_fin_t[m])
|
||||||
return indexer_lines, peaks_true, arg_min_hor_sort, indexer_lines_deletions_len, indexr_uniq_ind
|
return indexer_lines, peaks_true, arg_min_hor_sort, indexer_lines_deletions_len, indexr_uniq_ind
|
||||||
|
|
||||||
|
def implent_law_head_main_not_parallel(text_regions):
|
||||||
|
# print(text_regions.shape)
|
||||||
|
text_indexes = [1, 2] # 1: main text , 2: header , 3: comments
|
||||||
|
|
||||||
|
for t_i in text_indexes:
|
||||||
|
textline_mask = text_regions[:, :] == t_i
|
||||||
|
textline_mask = textline_mask * 255.0
|
||||||
|
|
||||||
|
textline_mask = textline_mask.astype(np.uint8)
|
||||||
|
textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2)
|
||||||
|
kernel = np.ones((5, 5), np.uint8)
|
||||||
|
|
||||||
|
# print(type(textline_mask),np.unique(textline_mask),textline_mask.shape)
|
||||||
|
imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY)
|
||||||
|
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||||
|
|
||||||
|
if t_i == 1:
|
||||||
|
contours_main, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
# print(type(contours_main))
|
||||||
|
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
|
||||||
|
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
||||||
|
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||||
|
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||||
|
x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
||||||
|
x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
||||||
|
|
||||||
|
y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
||||||
|
y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
||||||
|
# print(contours_main[0],np.shape(contours_main[0]),contours_main[0][:,0,0])
|
||||||
|
elif t_i == 2:
|
||||||
|
contours_header, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
# print(type(contours_header))
|
||||||
|
areas_header = np.array([cv2.contourArea(contours_header[j]) for j in range(len(contours_header))])
|
||||||
|
M_header = [cv2.moments(contours_header[j]) for j in range(len(contours_header))]
|
||||||
|
cx_header = [(M_header[j]["m10"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
|
||||||
|
cy_header = [(M_header[j]["m01"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
|
||||||
|
|
||||||
|
x_min_header = np.array([np.min(contours_header[j][:, 0, 0]) for j in range(len(contours_header))])
|
||||||
|
x_max_header = np.array([np.max(contours_header[j][:, 0, 0]) for j in range(len(contours_header))])
|
||||||
|
|
||||||
|
y_min_header = np.array([np.min(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
|
||||||
|
y_max_header = np.array([np.max(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
|
||||||
|
|
||||||
|
args = np.array(range(1, len(cy_header) + 1))
|
||||||
|
args_main = np.array(range(1, len(cy_main) + 1))
|
||||||
|
for jj in range(len(contours_main)):
|
||||||
|
headers_in_main = [(cy_header > y_min_main[jj]) & ((cy_header < y_max_main[jj]))]
|
||||||
|
mains_in_main = [(cy_main > y_min_main[jj]) & ((cy_main < y_max_main[jj]))]
|
||||||
|
args_log = args * headers_in_main
|
||||||
|
res = args_log[args_log > 0]
|
||||||
|
res_true = res - 1
|
||||||
|
|
||||||
|
args_log_main = args_main * mains_in_main
|
||||||
|
res_main = args_log_main[args_log_main > 0]
|
||||||
|
res_true_main = res_main - 1
|
||||||
|
|
||||||
|
if len(res_true) > 0:
|
||||||
|
sum_header = np.sum(areas_header[res_true])
|
||||||
|
sum_main = np.sum(areas_main[res_true_main])
|
||||||
|
if sum_main > sum_header:
|
||||||
|
cnt_int = [contours_header[j] for j in res_true]
|
||||||
|
text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(1, 1, 1))
|
||||||
|
else:
|
||||||
|
cnt_int = [contours_main[j] for j in res_true_main]
|
||||||
|
text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(2, 2, 2))
|
||||||
|
|
||||||
|
for jj in range(len(contours_header)):
|
||||||
|
main_in_header = [(cy_main > y_min_header[jj]) & ((cy_main < y_max_header[jj]))]
|
||||||
|
header_in_header = [(cy_header > y_min_header[jj]) & ((cy_header < y_max_header[jj]))]
|
||||||
|
args_log = args_main * main_in_header
|
||||||
|
res = args_log[args_log > 0]
|
||||||
|
res_true = res - 1
|
||||||
|
|
||||||
|
args_log_header = args * header_in_header
|
||||||
|
res_header = args_log_header[args_log_header > 0]
|
||||||
|
res_true_header = res_header - 1
|
||||||
|
|
||||||
|
if len(res_true) > 0:
|
||||||
|
|
||||||
|
sum_header = np.sum(areas_header[res_true_header])
|
||||||
|
sum_main = np.sum(areas_main[res_true])
|
||||||
|
|
||||||
|
if sum_main > sum_header:
|
||||||
|
|
||||||
|
cnt_int = [contours_header[j] for j in res_true_header]
|
||||||
|
text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(1, 1, 1))
|
||||||
|
else:
|
||||||
|
cnt_int = [contours_main[j] for j in res_true]
|
||||||
|
text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(2, 2, 2))
|
||||||
|
|
||||||
|
return text_regions
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1202,99 +1202,6 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
|
||||||
|
|
||||||
return final_indexers_sorted, matrix_of_orders, final_types, final_index_type
|
return final_indexers_sorted, matrix_of_orders, final_types, final_index_type
|
||||||
|
|
||||||
def implent_law_head_main_not_parallel(text_regions):
|
|
||||||
# print(text_regions.shape)
|
|
||||||
text_indexes = [1, 2] # 1: main text , 2: header , 3: comments
|
|
||||||
|
|
||||||
for t_i in text_indexes:
|
|
||||||
textline_mask = text_regions[:, :] == t_i
|
|
||||||
textline_mask = textline_mask * 255.0
|
|
||||||
|
|
||||||
textline_mask = textline_mask.astype(np.uint8)
|
|
||||||
textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2)
|
|
||||||
kernel = np.ones((5, 5), np.uint8)
|
|
||||||
|
|
||||||
# print(type(textline_mask),np.unique(textline_mask),textline_mask.shape)
|
|
||||||
imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY)
|
|
||||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
|
||||||
|
|
||||||
if t_i == 1:
|
|
||||||
contours_main, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
|
||||||
# print(type(contours_main))
|
|
||||||
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
|
|
||||||
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
|
||||||
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
|
||||||
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
|
||||||
x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
|
||||||
x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
|
||||||
|
|
||||||
y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
|
||||||
y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
|
||||||
# print(contours_main[0],np.shape(contours_main[0]),contours_main[0][:,0,0])
|
|
||||||
elif t_i == 2:
|
|
||||||
contours_header, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
|
||||||
# print(type(contours_header))
|
|
||||||
areas_header = np.array([cv2.contourArea(contours_header[j]) for j in range(len(contours_header))])
|
|
||||||
M_header = [cv2.moments(contours_header[j]) for j in range(len(contours_header))]
|
|
||||||
cx_header = [(M_header[j]["m10"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
|
|
||||||
cy_header = [(M_header[j]["m01"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
|
|
||||||
|
|
||||||
x_min_header = np.array([np.min(contours_header[j][:, 0, 0]) for j in range(len(contours_header))])
|
|
||||||
x_max_header = np.array([np.max(contours_header[j][:, 0, 0]) for j in range(len(contours_header))])
|
|
||||||
|
|
||||||
y_min_header = np.array([np.min(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
|
|
||||||
y_max_header = np.array([np.max(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
|
|
||||||
|
|
||||||
args = np.array(range(1, len(cy_header) + 1))
|
|
||||||
args_main = np.array(range(1, len(cy_main) + 1))
|
|
||||||
for jj in range(len(contours_main)):
|
|
||||||
headers_in_main = [(cy_header > y_min_main[jj]) & ((cy_header < y_max_main[jj]))]
|
|
||||||
mains_in_main = [(cy_main > y_min_main[jj]) & ((cy_main < y_max_main[jj]))]
|
|
||||||
args_log = args * headers_in_main
|
|
||||||
res = args_log[args_log > 0]
|
|
||||||
res_true = res - 1
|
|
||||||
|
|
||||||
args_log_main = args_main * mains_in_main
|
|
||||||
res_main = args_log_main[args_log_main > 0]
|
|
||||||
res_true_main = res_main - 1
|
|
||||||
|
|
||||||
if len(res_true) > 0:
|
|
||||||
sum_header = np.sum(areas_header[res_true])
|
|
||||||
sum_main = np.sum(areas_main[res_true_main])
|
|
||||||
if sum_main > sum_header:
|
|
||||||
cnt_int = [contours_header[j] for j in res_true]
|
|
||||||
text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(1, 1, 1))
|
|
||||||
else:
|
|
||||||
cnt_int = [contours_main[j] for j in res_true_main]
|
|
||||||
text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(2, 2, 2))
|
|
||||||
|
|
||||||
for jj in range(len(contours_header)):
|
|
||||||
main_in_header = [(cy_main > y_min_header[jj]) & ((cy_main < y_max_header[jj]))]
|
|
||||||
header_in_header = [(cy_header > y_min_header[jj]) & ((cy_header < y_max_header[jj]))]
|
|
||||||
args_log = args_main * main_in_header
|
|
||||||
res = args_log[args_log > 0]
|
|
||||||
res_true = res - 1
|
|
||||||
|
|
||||||
args_log_header = args * header_in_header
|
|
||||||
res_header = args_log_header[args_log_header > 0]
|
|
||||||
res_true_header = res_header - 1
|
|
||||||
|
|
||||||
if len(res_true) > 0:
|
|
||||||
|
|
||||||
sum_header = np.sum(areas_header[res_true_header])
|
|
||||||
sum_main = np.sum(areas_main[res_true])
|
|
||||||
|
|
||||||
if sum_main > sum_header:
|
|
||||||
|
|
||||||
cnt_int = [contours_header[j] for j in res_true_header]
|
|
||||||
text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(1, 1, 1))
|
|
||||||
else:
|
|
||||||
cnt_int = [contours_main[j] for j in res_true]
|
|
||||||
text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(2, 2, 2))
|
|
||||||
|
|
||||||
return text_regions
|
|
||||||
|
|
||||||
|
|
||||||
def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(img_p_in_ver, img_in_hor,num_col_classifier):
|
def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(img_p_in_ver, img_in_hor,num_col_classifier):
|
||||||
#img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2)
|
#img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2)
|
||||||
img_p_in_ver=img_p_in_ver.astype(np.uint8)
|
img_p_in_ver=img_p_in_ver.astype(np.uint8)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue