mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 11:49:55 +02:00
remove unused delete_seperator_around
This commit is contained in:
parent
133982380f
commit
c80fddb3b8
3 changed files with 24 additions and 28 deletions
|
@ -1,6 +1,6 @@
|
||||||
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring
|
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring
|
||||||
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines
|
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements
|
||||||
# pylint: disable=too-many-public-methods
|
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods,
|
||||||
"""
|
"""
|
||||||
tool to extract table form data from alto xml data
|
tool to extract table form data from alto xml data
|
||||||
"""
|
"""
|
||||||
|
@ -67,8 +67,6 @@ from .utils import (
|
||||||
crop_image_inside_box,
|
crop_image_inside_box,
|
||||||
find_num_col,
|
find_num_col,
|
||||||
otsu_copy_binary,
|
otsu_copy_binary,
|
||||||
delete_seperator_around,
|
|
||||||
return_regions_without_seperators,
|
|
||||||
put_drop_out_from_only_drop_model,
|
put_drop_out_from_only_drop_model,
|
||||||
putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
|
putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
|
||||||
check_any_text_region_in_model_one_is_main_or_header,
|
check_any_text_region_in_model_one_is_main_or_header,
|
||||||
|
@ -107,7 +105,7 @@ class eynollah:
|
||||||
allow_scaling=False,
|
allow_scaling=False,
|
||||||
headers_off=False
|
headers_off=False
|
||||||
):
|
):
|
||||||
self.image_filename = image_filename # XXX This does not seem to be a directory as the name suggests, but a file
|
self.image_filename = image_filename
|
||||||
self.cont_page = []
|
self.cont_page = []
|
||||||
self.dir_out = dir_out
|
self.dir_out = dir_out
|
||||||
self.image_filename_stem = image_filename_stem
|
self.image_filename_stem = image_filename_stem
|
||||||
|
@ -137,7 +135,7 @@ class eynollah:
|
||||||
self.model_region_dir_fully = dir_models + "/model_3up_new_good_no_augmentation.h5"
|
self.model_region_dir_fully = dir_models + "/model_3up_new_good_no_augmentation.h5"
|
||||||
self.model_page_dir = dir_models + "/model_page_mixed_best.h5"
|
self.model_page_dir = dir_models + "/model_page_mixed_best.h5"
|
||||||
self.model_region_dir_p_ens = dir_models + "/model_ensemble_s.h5"
|
self.model_region_dir_p_ens = dir_models + "/model_ensemble_s.h5"
|
||||||
self.model_textline_dir = dir_models + "/model_textline_newspapers.h5"
|
self.model_textline_dir = dir_models + "/model_textline_newspapers.h5"
|
||||||
|
|
||||||
self._imgs = {}
|
self._imgs = {}
|
||||||
|
|
||||||
|
|
|
@ -3344,4 +3344,24 @@ def implent_law_head_main_not_parallel(text_regions):
|
||||||
|
|
||||||
return text_regions
|
return text_regions
|
||||||
|
|
||||||
|
def delete_seperator_around(spliter_y, peaks_neg, image_by_region):
|
||||||
|
# format of subboxes box=[x1, x2 , y1, y2]
|
||||||
|
|
||||||
|
if len(image_by_region.shape) == 3:
|
||||||
|
for i in range(len(spliter_y) - 1):
|
||||||
|
for j in range(1, len(peaks_neg[i]) - 1):
|
||||||
|
image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0] == 6] = 0
|
||||||
|
image_by_region[spliter_y[i] : spliter_y[i + 1], peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 1] == 6] = 0
|
||||||
|
image_by_region[spliter_y[i] : spliter_y[i + 1], peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 2] == 6] = 0
|
||||||
|
|
||||||
|
image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0] == 7] = 0
|
||||||
|
image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 1] == 7] = 0
|
||||||
|
image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 2] == 7] = 0
|
||||||
|
else:
|
||||||
|
for i in range(len(spliter_y) - 1):
|
||||||
|
for j in range(1, len(peaks_neg[i]) - 1):
|
||||||
|
image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])] == 6] = 0
|
||||||
|
|
||||||
|
image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])] == 7] = 0
|
||||||
|
return image_by_region
|
||||||
|
|
||||||
|
|
|
@ -718,28 +718,6 @@ def find_num_col_by_vertical_lines(regions_without_seperators, multiplier=3.8):
|
||||||
# print(peaks,'peaksnew')
|
# print(peaks,'peaksnew')
|
||||||
return peaks
|
return peaks
|
||||||
|
|
||||||
|
|
||||||
def delete_seperator_around(spliter_y, peaks_neg, image_by_region):
|
|
||||||
# format of subboxes box=[x1, x2 , y1, y2]
|
|
||||||
|
|
||||||
if len(image_by_region.shape) == 3:
|
|
||||||
for i in range(len(spliter_y) - 1):
|
|
||||||
for j in range(1, len(peaks_neg[i]) - 1):
|
|
||||||
image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0] == 6] = 0
|
|
||||||
image_by_region[spliter_y[i] : spliter_y[i + 1], peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 1] == 6] = 0
|
|
||||||
image_by_region[spliter_y[i] : spliter_y[i + 1], peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 2] == 6] = 0
|
|
||||||
|
|
||||||
image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0] == 7] = 0
|
|
||||||
image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 1] == 7] = 0
|
|
||||||
image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 2] == 7] = 0
|
|
||||||
else:
|
|
||||||
for i in range(len(spliter_y) - 1):
|
|
||||||
for j in range(1, len(peaks_neg[i]) - 1):
|
|
||||||
image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])] == 6] = 0
|
|
||||||
|
|
||||||
image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])] == 7] = 0
|
|
||||||
return image_by_region
|
|
||||||
|
|
||||||
def return_regions_without_seperators(regions_pre):
|
def return_regions_without_seperators(regions_pre):
|
||||||
kernel = np.ones((5, 5), np.uint8)
|
kernel = np.ones((5, 5), np.uint8)
|
||||||
regions_without_seperators = ((regions_pre[:, :] != 6) & (regions_pre[:, :] != 0)) * 1
|
regions_without_seperators = ((regions_pre[:, :] != 6) & (regions_pre[:, :] != 0)) * 1
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue