mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-05-01 03:32:00 +02:00
simplify, add confidence for headings as well
This commit is contained in:
parent
264b00f8ab
commit
a2f43b8d69
2 changed files with 24 additions and 26 deletions
|
|
@ -1712,19 +1712,20 @@ class Eynollah:
|
||||||
#print(time.time()-t_0_box,'time box in 3')
|
#print(time.time()-t_0_box,'time box in 3')
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(
|
boxes, _ = return_boxes_of_images_by_order_of_reading_new(
|
||||||
splitter_y_new, regions_without_separators,
|
splitter_y_new, regions_without_separators,
|
||||||
text_regions_p == label_seps_fl, matrix_of_seps_ch,
|
text_regions_p == label_seps_fl, matrix_of_seps_ch,
|
||||||
num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
num_col_classifier, erosion_hurts, self.tables, self.right2left,
|
||||||
|
logger=self.logger)
|
||||||
boxes_d = None
|
boxes_d = None
|
||||||
self.logger.debug("len(boxes): %s", len(boxes))
|
self.logger.debug("len(boxes): %s", len(boxes))
|
||||||
#print(time.time()-t_0_box,'time box in 3.1')
|
#print(time.time()-t_0_box,'time box in 3.1')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(
|
boxes_d, _ = return_boxes_of_images_by_order_of_reading_new(
|
||||||
splitter_y_new_d, regions_without_separators_d,
|
splitter_y_new_d, regions_without_separators_d,
|
||||||
text_regions_p_d == label_seps_fl, matrix_of_seps_ch_d,
|
text_regions_p_d == label_seps_fl, matrix_of_seps_ch_d,
|
||||||
num_col_classifier, erosion_hurts, self.tables, self.right2left)
|
num_col_classifier, erosion_hurts, self.tables, self.right2left,
|
||||||
|
logger=self.logger)
|
||||||
boxes = None
|
boxes = None
|
||||||
self.logger.debug("len(boxes): %s", len(boxes_d))
|
self.logger.debug("len(boxes): %s", len(boxes_d))
|
||||||
|
|
||||||
|
|
@ -2843,20 +2844,14 @@ class Eynollah:
|
||||||
|
|
||||||
if not self.reading_order_machine_based:
|
if not self.reading_order_machine_based:
|
||||||
label_seps = 6
|
label_seps = 6
|
||||||
if not self.headers_off:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
_, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
|
||||||
num_col, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
|
text_regions_p, num_col_classifier, self.tables, label_seps,
|
||||||
text_regions_p, num_col_classifier, self.tables, label_seps, contours_only_text_parent_h)
|
contours_h=None if self.headers_off else contours_only_text_parent_h)
|
||||||
else:
|
else:
|
||||||
_, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
|
_, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
|
||||||
text_regions_p_d, num_col_classifier, self.tables, label_seps, contours_only_text_parent_h_d_ordered)
|
text_regions_p_d, num_col_classifier, self.tables, label_seps,
|
||||||
elif self.headers_off:
|
contours_h=None if self.headers_off else contours_only_text_parent_h_d_ordered)
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
|
||||||
num_col, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
|
|
||||||
text_regions_p, num_col_classifier, self.tables, label_seps)
|
|
||||||
else:
|
|
||||||
_, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
|
|
||||||
text_regions_p_d, num_col_classifier, self.tables, label_seps)
|
|
||||||
|
|
||||||
if not erosion_hurts:
|
if not erosion_hurts:
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
|
|
|
||||||
|
|
@ -899,16 +899,19 @@ def split_textregion_main_vs_head(
|
||||||
h_o = regions_model_1.shape[0]
|
h_o = regions_model_1.shape[0]
|
||||||
w_o = regions_model_1.shape[1]
|
w_o = regions_model_1.shape[1]
|
||||||
zoom = 3
|
zoom = 3
|
||||||
regions_model_1 = cv2.resize(regions_model_1, (regions_model_1.shape[1] // zoom,
|
regions_model_1 = cv2.resize(regions_model_1,
|
||||||
regions_model_1.shape[0] // zoom),
|
(regions_model_1.shape[1] // zoom,
|
||||||
|
regions_model_1.shape[0] // zoom),
|
||||||
interpolation=cv2.INTER_NEAREST)
|
interpolation=cv2.INTER_NEAREST)
|
||||||
regions_model_full = cv2.resize(regions_model_full, (regions_model_full.shape[1] // zoom,
|
regions_model_full = cv2.resize(regions_model_full,
|
||||||
regions_model_full.shape[0] // zoom),
|
(regions_model_full.shape[1] // zoom,
|
||||||
|
regions_model_full.shape[0] // zoom),
|
||||||
interpolation=cv2.INTER_NEAREST)
|
interpolation=cv2.INTER_NEAREST)
|
||||||
contours_only_text_parent_z = [(cnt / zoom).astype(int) for cnt in contours_only_text_parent]
|
contours_only_text_parent_z = [contour // zoom
|
||||||
|
for contour in contours_only_text_parent]
|
||||||
|
|
||||||
###
|
###
|
||||||
cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = \
|
_, _, x_min_main, x_max_main, y_min_main, y_max_main, _ = \
|
||||||
find_new_features_of_contours(contours_only_text_parent_z)
|
find_new_features_of_contours(contours_only_text_parent_z)
|
||||||
|
|
||||||
length_con=x_max_main-x_min_main
|
length_con=x_max_main-x_min_main
|
||||||
|
|
@ -947,7 +950,7 @@ def split_textregion_main_vs_head(
|
||||||
|
|
||||||
regions_model_1[(regions_model_1 == label_text) & (parent > 0)] = label_head_final
|
regions_model_1[(regions_model_1 == label_text) & (parent > 0)] = label_head_final
|
||||||
contours_only_text_parent_head.append(contours_only_text_parent[ii])
|
contours_only_text_parent_head.append(contours_only_text_parent[ii])
|
||||||
conf_contours_head.append(None) # why not conf_contours[ii], too?
|
conf_contours_head.append(conf_contours[ii])
|
||||||
if len(contours_only_text_parent_d_ordered):
|
if len(contours_only_text_parent_d_ordered):
|
||||||
contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
|
contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
|
||||||
all_box_coord_head.append(all_box_coord[ii])
|
all_box_coord_head.append(all_box_coord[ii])
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue