mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 11:49:55 +02:00
more modifications for tables
This commit is contained in:
parent
9f64110513
commit
254abf4d3d
2 changed files with 92 additions and 57 deletions
|
@ -1174,7 +1174,7 @@ class Eynollah:
|
||||||
try:
|
try:
|
||||||
img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=20)
|
img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=20)
|
||||||
|
|
||||||
_, _ = find_num_col(img_only_regions, multiplier=6.0)
|
_, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
|
||||||
|
|
||||||
img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1)))
|
img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1)))
|
||||||
|
|
||||||
|
@ -1976,7 +1976,7 @@ class Eynollah:
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
num_col, _ = find_num_col(img_only_regions, multiplier=6.0)
|
num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
|
||||||
num_col = num_col + 1
|
num_col = num_col + 1
|
||||||
if not num_column_is_classified:
|
if not num_column_is_classified:
|
||||||
num_col_classifier = num_col + 1
|
num_col_classifier = num_col + 1
|
||||||
|
@ -2071,10 +2071,10 @@ class Eynollah:
|
||||||
regions_without_separators_d = None
|
regions_without_separators_d = None
|
||||||
pixel_lines = 3
|
pixel_lines = 3
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
_, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines)
|
_, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines)
|
||||||
|
|
||||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||||
_, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines)
|
_, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines)
|
||||||
K.clear_session()
|
K.clear_session()
|
||||||
|
|
||||||
self.logger.info("num_col_classifier: %s", num_col_classifier)
|
self.logger.info("num_col_classifier: %s", num_col_classifier)
|
||||||
|
@ -2088,7 +2088,7 @@ class Eynollah:
|
||||||
regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
|
regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts)
|
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables)
|
||||||
boxes_d = None
|
boxes_d = None
|
||||||
self.logger.debug("len(boxes): %s", len(boxes))
|
self.logger.debug("len(boxes): %s", len(boxes))
|
||||||
|
|
||||||
|
@ -2098,7 +2098,7 @@ class Eynollah:
|
||||||
img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line)
|
img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line)
|
||||||
img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier)
|
img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier)
|
||||||
else:
|
else:
|
||||||
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts)
|
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables)
|
||||||
boxes = None
|
boxes = None
|
||||||
self.logger.debug("len(boxes): %s", len(boxes_d))
|
self.logger.debug("len(boxes): %s", len(boxes_d))
|
||||||
|
|
||||||
|
@ -2156,34 +2156,34 @@ class Eynollah:
|
||||||
textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1])
|
textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1])
|
||||||
table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1])
|
table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1])
|
||||||
|
|
||||||
regions_without_seperators_d=(text_regions_p_1_n[:,:] == 1)*1
|
regions_without_separators_d=(text_regions_p_1_n[:,:] == 1)*1
|
||||||
regions_without_seperators_d[table_prediction_n[:,:] == 1] = 1
|
regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
|
||||||
|
|
||||||
regions_without_seperators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions)
|
regions_without_separators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions)
|
||||||
regions_without_seperators[table_prediction == 1] = 1
|
regions_without_separators[table_prediction == 1] = 1
|
||||||
|
|
||||||
pixel_lines=3
|
pixel_lines=3
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
num_col, peaks_neg_fin, matrix_of_lines_ch, splitter_y_new, seperators_closeup_n = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines)
|
num_col, peaks_neg_fin, matrix_of_lines_ch, splitter_y_new, seperators_closeup_n = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines)
|
||||||
|
|
||||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||||
num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, splitter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),num_col_classifier,pixel_lines)
|
num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, splitter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),num_col_classifier, self.tables, pixel_lines)
|
||||||
K.clear_session()
|
K.clear_session()
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
|
||||||
if num_col_classifier>=3:
|
if num_col_classifier>=3:
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
regions_without_seperators = regions_without_seperators.astype(np.uint8)
|
regions_without_separators = regions_without_separators.astype(np.uint8)
|
||||||
regions_without_seperators = cv2.erode(regions_without_seperators[:,:], KERNEL, iterations=6)
|
regions_without_separators = cv2.erode(regions_without_separators[:,:], KERNEL, iterations=6)
|
||||||
|
|
||||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||||
regions_without_seperators_d = regions_without_seperators_d.astype(np.uint8)
|
regions_without_separators_d = regions_without_separators_d.astype(np.uint8)
|
||||||
regions_without_seperators_d = cv2.erode(regions_without_seperators_d[:,:], KERNEL, iterations=6)
|
regions_without_separators_d = cv2.erode(regions_without_separators_d[:,:], KERNEL, iterations=6)
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_seperators, matrix_of_lines_ch, num_col_classifier, erosion_hurts)
|
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables)
|
||||||
text_regions_p_tables = np.copy(text_regions_p)
|
text_regions_p_tables = np.copy(text_regions_p)
|
||||||
text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10
|
text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10
|
||||||
pixel_line = 3
|
pixel_line = 3
|
||||||
|
@ -2192,7 +2192,7 @@ class Eynollah:
|
||||||
img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction, 10, num_col_classifier)
|
img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction, 10, num_col_classifier)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_seperators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts)
|
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables)
|
||||||
text_regions_p_tables = np.copy(text_regions_p_1_n)
|
text_regions_p_tables = np.copy(text_regions_p_1_n)
|
||||||
text_regions_p_tables = np.round(text_regions_p_tables)
|
text_regions_p_tables = np.round(text_regions_p_tables)
|
||||||
text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10
|
text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10
|
||||||
|
@ -2271,20 +2271,20 @@ class Eynollah:
|
||||||
text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4
|
text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4
|
||||||
#plt.imshow(text_regions_p)
|
#plt.imshow(text_regions_p)
|
||||||
#plt.show()
|
#plt.show()
|
||||||
|
if not self.tables:
|
||||||
|
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||||
|
_, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout(image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew)
|
||||||
|
|
||||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1])
|
||||||
_, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout(image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew)
|
textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1])
|
||||||
|
regions_fully_n = resize_image(regions_fully_n, text_regions_p.shape[0], text_regions_p.shape[1])
|
||||||
|
regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1
|
||||||
|
else:
|
||||||
|
text_regions_p_1_n = None
|
||||||
|
textline_mask_tot_d = None
|
||||||
|
regions_without_separators_d = None
|
||||||
|
|
||||||
text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1])
|
regions_without_separators = (text_regions_p[:, :] == 1) * 1
|
||||||
textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1])
|
|
||||||
regions_fully_n = resize_image(regions_fully_n, text_regions_p.shape[0], text_regions_p.shape[1])
|
|
||||||
regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1
|
|
||||||
else:
|
|
||||||
text_regions_p_1_n = None
|
|
||||||
textline_mask_tot_d = None
|
|
||||||
regions_without_separators_d = None
|
|
||||||
|
|
||||||
regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions)
|
|
||||||
|
|
||||||
K.clear_session()
|
K.clear_session()
|
||||||
img_revised_tab = np.copy(text_regions_p[:, :])
|
img_revised_tab = np.copy(text_regions_p[:, :])
|
||||||
|
@ -2327,6 +2327,8 @@ class Eynollah:
|
||||||
slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
|
slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
|
||||||
self.logger.info("deskewing took %ss", str(time.time() - t1))
|
self.logger.info("deskewing took %ss", str(time.time() - t1))
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
|
#plt.imshow(table_prediction)
|
||||||
|
#plt.show()
|
||||||
|
|
||||||
textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
|
textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
|
||||||
self.logger.info("detection of marginals took %ss", str(time.time() - t1))
|
self.logger.info("detection of marginals took %ss", str(time.time() - t1))
|
||||||
|
@ -2482,14 +2484,14 @@ class Eynollah:
|
||||||
|
|
||||||
if not self.headers_off:
|
if not self.headers_off:
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines, contours_only_text_parent_h)
|
num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h)
|
||||||
else:
|
else:
|
||||||
_, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines, contours_only_text_parent_h_d_ordered)
|
_, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered)
|
||||||
elif self.headers_off:
|
elif self.headers_off:
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines)
|
num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines)
|
||||||
else:
|
else:
|
||||||
_, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, pixel_lines)
|
_, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines)
|
||||||
|
|
||||||
# print(peaks_neg_fin,peaks_neg_fin_d,'num_col2')
|
# print(peaks_neg_fin,peaks_neg_fin_d,'num_col2')
|
||||||
# print(splitter_y_new,splitter_y_new_d,'num_col_classifier')
|
# print(splitter_y_new,splitter_y_new_d,'num_col_classifier')
|
||||||
|
@ -2499,22 +2501,42 @@ class Eynollah:
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
regions_without_separators = regions_without_separators.astype(np.uint8)
|
regions_without_separators = regions_without_separators.astype(np.uint8)
|
||||||
regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6)
|
regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6)
|
||||||
random_pixels_for_image = np.random.randn(regions_without_separators.shape[0], regions_without_separators.shape[1])
|
|
||||||
random_pixels_for_image[random_pixels_for_image < -0.5] = 0
|
#regions_without_separators_0 = regions_without_separators[:, :].sum(axis=0)
|
||||||
random_pixels_for_image[random_pixels_for_image != 0] = 1
|
#meda_n_updown = regions_without_separators_0[len(regions_without_separators_0) :: -1]
|
||||||
regions_without_separators[(random_pixels_for_image[:, :] == 1) & (text_regions_p[:, :] == 5)] = 1
|
#first_nonzero = next((i for i, x in enumerate(regions_without_separators_0) if x), 0)
|
||||||
|
#last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0)
|
||||||
|
#last_nonzero = len(regions_without_separators_0) - last_nonzero
|
||||||
|
|
||||||
|
#random_pixels_for_image = np.random.randn(regions_without_separators.shape[0], regions_without_separators.shape[1])
|
||||||
|
#random_pixels_for_image[random_pixels_for_image < -0.5] = 0
|
||||||
|
#random_pixels_for_image[random_pixels_for_image != 0] = 1
|
||||||
|
#regions_without_separators[(random_pixels_for_image[:, :] == 1) & (text_regions_p[:, :] == 5)] = 1
|
||||||
|
|
||||||
|
#regions_without_separators[:, 0:first_nonzero] = 0
|
||||||
|
#regions_without_separators[:, last_nonzero:] = 0
|
||||||
else:
|
else:
|
||||||
regions_without_separators_d = regions_without_separators_d.astype(np.uint8)
|
regions_without_separators_d = regions_without_separators_d.astype(np.uint8)
|
||||||
regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
|
regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
|
||||||
random_pixels_for_image = np.random.randn(regions_without_separators_d.shape[0], regions_without_separators_d.shape[1])
|
|
||||||
random_pixels_for_image[random_pixels_for_image < -0.5] = 0
|
#regions_without_separators_0 = regions_without_separators_d[:, :].sum(axis=0)
|
||||||
random_pixels_for_image[random_pixels_for_image != 0] = 1
|
#meda_n_updown = regions_without_separators_0[len(regions_without_separators_0) :: -1]
|
||||||
regions_without_separators_d[(random_pixels_for_image[:, :] == 1) & (text_regions_p_1_n[:, :] == 5)] = 1
|
#first_nonzero = next((i for i, x in enumerate(regions_without_separators_0) if x), 0)
|
||||||
|
#last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0)
|
||||||
|
#last_nonzero = len(regions_without_separators_0) - last_nonzero
|
||||||
|
|
||||||
|
#random_pixels_for_image = np.random.randn(regions_without_separators_d.shape[0], regions_without_separators_d.shape[1])
|
||||||
|
#random_pixels_for_image[random_pixels_for_image < -0.5] = 0
|
||||||
|
#random_pixels_for_image[random_pixels_for_image != 0] = 1
|
||||||
|
##regions_without_separators_d[(random_pixels_for_image[:, :] == 1) & (text_regions_p_1_n[:, :] == 5)] = 1
|
||||||
|
|
||||||
|
#regions_without_separators_d[:, 0:first_nonzero] = 0
|
||||||
|
#regions_without_separators_d[:, last_nonzero:] = 0
|
||||||
|
|
||||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||||
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts)
|
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables)
|
||||||
else:
|
else:
|
||||||
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts)
|
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables)
|
||||||
|
|
||||||
if self.plotter:
|
if self.plotter:
|
||||||
self.plotter.write_images_into_directory(polygons_of_images, image_page)
|
self.plotter.write_images_into_directory(polygons_of_images, image_page)
|
||||||
|
|
|
@ -360,7 +360,7 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
|
||||||
return np.std(z)
|
return np.std(z)
|
||||||
|
|
||||||
|
|
||||||
def find_num_col(regions_without_separators, multiplier=3.8):
|
def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8):
|
||||||
regions_without_separators_0 = regions_without_separators[:, :].sum(axis=0)
|
regions_without_separators_0 = regions_without_separators[:, :].sum(axis=0)
|
||||||
##plt.plot(regions_without_separators_0)
|
##plt.plot(regions_without_separators_0)
|
||||||
##plt.show()
|
##plt.show()
|
||||||
|
@ -416,6 +416,19 @@ def find_num_col(regions_without_separators, multiplier=3.8):
|
||||||
interest_neg_fin = interest_neg[(interest_neg < grenze)]
|
interest_neg_fin = interest_neg[(interest_neg < grenze)]
|
||||||
peaks_neg_fin = peaks_neg[(interest_neg < grenze)]
|
peaks_neg_fin = peaks_neg[(interest_neg < grenze)]
|
||||||
# interest_neg_fin=interest_neg[(interest_neg<grenze)]
|
# interest_neg_fin=interest_neg[(interest_neg<grenze)]
|
||||||
|
|
||||||
|
if not tables:
|
||||||
|
if ( num_col_classifier - ( (len(interest_neg_fin))+1 ) ) >= 3:
|
||||||
|
index_sort_interest_neg_fin= np.argsort(interest_neg_fin)
|
||||||
|
peaks_neg_sorted = np.array(peaks_neg)[index_sort_interest_neg_fin]
|
||||||
|
interest_neg_fin_sorted = np.array(interest_neg_fin)[index_sort_interest_neg_fin]
|
||||||
|
|
||||||
|
if len(index_sort_interest_neg_fin)>=num_col_classifier:
|
||||||
|
peaks_neg_fin = list( peaks_neg_sorted[:num_col_classifier] )
|
||||||
|
interest_neg_fin = list( interest_neg_fin_sorted[:num_col_classifier] )
|
||||||
|
else:
|
||||||
|
peaks_neg_fin = peaks_neg[:]
|
||||||
|
interest_neg_fin = interest_neg[:]
|
||||||
|
|
||||||
num_col = (len(interest_neg_fin)) + 1
|
num_col = (len(interest_neg_fin)) + 1
|
||||||
|
|
||||||
|
@ -489,9 +502,9 @@ def find_num_col(regions_without_separators, multiplier=3.8):
|
||||||
num_col = 1
|
num_col = 1
|
||||||
peaks_neg_true = []
|
peaks_neg_true = []
|
||||||
|
|
||||||
diff_peaks_annormal = diff_peaks[diff_peaks < 360]
|
diff_peaks_abnormal = diff_peaks[diff_peaks < 360]
|
||||||
|
|
||||||
if len(diff_peaks_annormal) > 0:
|
if len(diff_peaks_abnormal) > 0:
|
||||||
arg_help = np.array(range(len(diff_peaks)))
|
arg_help = np.array(range(len(diff_peaks)))
|
||||||
arg_help_ann = arg_help[diff_peaks < 360]
|
arg_help_ann = arg_help[diff_peaks < 360]
|
||||||
|
|
||||||
|
@ -1248,7 +1261,7 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point):
|
||||||
peaks_neg_tot.append(last_point)
|
peaks_neg_tot.append(last_point)
|
||||||
return peaks_neg_tot
|
return peaks_neg_tot
|
||||||
|
|
||||||
def find_number_of_columns_in_document(region_pre_p, num_col_classifier, pixel_lines, contours_h=None):
|
def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None):
|
||||||
|
|
||||||
separators_closeup=( (region_pre_p[:,:,:]==pixel_lines))*1
|
separators_closeup=( (region_pre_p[:,:,:]==pixel_lines))*1
|
||||||
|
|
||||||
|
@ -1561,7 +1574,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, pixel_l
|
||||||
#regions_without_separators_tile=cv2.erode(regions_without_separators_tile,kernel,iterations = 3)
|
#regions_without_separators_tile=cv2.erode(regions_without_separators_tile,kernel,iterations = 3)
|
||||||
#
|
#
|
||||||
try:
|
try:
|
||||||
num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile,multiplier=7.0)
|
num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0)
|
||||||
except:
|
except:
|
||||||
num_col = 0
|
num_col = 0
|
||||||
peaks_neg_fin = []
|
peaks_neg_fin = []
|
||||||
|
@ -1583,7 +1596,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, pixel_l
|
||||||
return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n
|
return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n
|
||||||
|
|
||||||
|
|
||||||
def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts):
|
def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, tables):
|
||||||
boxes=[]
|
boxes=[]
|
||||||
peaks_neg_tot_tables = []
|
peaks_neg_tot_tables = []
|
||||||
|
|
||||||
|
@ -1599,20 +1612,21 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if erosion_hurts:
|
if erosion_hurts:
|
||||||
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=6.)
|
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], num_col_classifier, tables, multiplier=6.)
|
||||||
else:
|
else:
|
||||||
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=7.)
|
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],num_col_classifier, tables, multiplier=7.)
|
||||||
except:
|
except:
|
||||||
peaks_neg_fin=[]
|
peaks_neg_fin=[]
|
||||||
|
num_col = 0
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
peaks_neg_fin_org=np.copy(peaks_neg_fin)
|
peaks_neg_fin_org=np.copy(peaks_neg_fin)
|
||||||
if (len(peaks_neg_fin)+1)<num_col_classifier:
|
if (len(peaks_neg_fin)+1)<num_col_classifier or num_col_classifier==6:
|
||||||
#print('burda')
|
#print('burda')
|
||||||
|
|
||||||
if len(peaks_neg_fin)==0:
|
if len(peaks_neg_fin)==0:
|
||||||
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=3.)
|
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],num_col_classifier, tables, multiplier=3.)
|
||||||
peaks_neg_fin_early=[]
|
peaks_neg_fin_early=[]
|
||||||
peaks_neg_fin_early.append(0)
|
peaks_neg_fin_early.append(0)
|
||||||
#print(peaks_neg_fin,'peaks_neg_fin')
|
#print(peaks_neg_fin,'peaks_neg_fin')
|
||||||
|
@ -1628,12 +1642,12 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
|
||||||
#plt.plot(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]].sum(axis=0) )
|
#plt.plot(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]].sum(axis=0) )
|
||||||
#plt.show()
|
#plt.show()
|
||||||
try:
|
try:
|
||||||
num_col, peaks_neg_fin1=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],multiplier=7.)
|
num_col, peaks_neg_fin1=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],num_col_classifier,tables, multiplier=7.)
|
||||||
except:
|
except:
|
||||||
peaks_neg_fin1=[]
|
peaks_neg_fin1=[]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
num_col, peaks_neg_fin2=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],multiplier=5.)
|
num_col, peaks_neg_fin2=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),peaks_neg_fin_early[i_n]:peaks_neg_fin_early[i_n+1]],num_col_classifier,tables, multiplier=5.)
|
||||||
except:
|
except:
|
||||||
peaks_neg_fin2=[]
|
peaks_neg_fin2=[]
|
||||||
|
|
||||||
|
@ -2238,5 +2252,4 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
|
||||||
|
|
||||||
#else:
|
#else:
|
||||||
#boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]])
|
#boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]])
|
||||||
|
|
||||||
return boxes, peaks_neg_tot_tables
|
return boxes, peaks_neg_tot_tables
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue