mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-12-08 04:04:16 +01:00
Restore correct execution of export_textline_images_and_text
This commit is contained in:
parent
6ac37af2f8
commit
7bf5e077d9
1 changed files with 22 additions and 24 deletions
|
|
@ -20,7 +20,6 @@ from ..utils import is_image_filename
|
|||
@click.option(
|
||||
"--dir_in",
|
||||
"-di",
|
||||
'image_filename',
|
||||
help="directory of input images (instead of --image)",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
|
|
@ -52,21 +51,21 @@ from ..utils import is_image_filename
|
|||
help="if this parameter set to true, cropped textline images will not be masked with textline contour.",
|
||||
)
|
||||
def linegt_cli(
|
||||
image_filename,
|
||||
image,
|
||||
dir_in,
|
||||
dir_xmls,
|
||||
dir_out,
|
||||
pref_of_dataset,
|
||||
do_not_mask_with_textline_contour,
|
||||
):
|
||||
assert bool(dir_in) ^ bool(image_filename), "Set --dir-in or --image-filename, not both"
|
||||
assert bool(dir_in) ^ bool(image), "Set --dir-in or --image-filename, not both"
|
||||
if dir_in:
|
||||
ls_imgs = [
|
||||
os.path.join(dir_in, image_filename) for image_filename in filter(is_image_filename, os.listdir(dir_in))
|
||||
os.path.join(dir_in, image) for image in filter(is_image_filename, os.listdir(dir_in))
|
||||
]
|
||||
else:
|
||||
assert image_filename
|
||||
ls_imgs = [image_filename]
|
||||
assert image
|
||||
ls_imgs = [image]
|
||||
|
||||
for dir_img in ls_imgs:
|
||||
file_name = Path(dir_img).stem
|
||||
|
|
@ -83,7 +82,7 @@ def linegt_cli(
|
|||
name_space = alltags[0].split('}')[0]
|
||||
name_space = name_space.split('{')[1]
|
||||
|
||||
region_tags = [x for x in alltags if x.endswith('TextRegion')][0]
|
||||
region_tags = np.unique([x for x in alltags if x.endswith('TextRegion')])
|
||||
|
||||
cropped_lines_region_indexer = []
|
||||
|
||||
|
|
@ -116,21 +115,20 @@ def linegt_cli(
|
|||
|
||||
if img_crop.shape[0] == 0 or img_crop.shape[1] == 0:
|
||||
continue
|
||||
if child_textlines.tag.endswith("TextEquiv"):
|
||||
for cheild_text in child_textlines:
|
||||
if cheild_text.tag.endswith("Unicode"):
|
||||
textline_text = cheild_text.text
|
||||
if textline_text:
|
||||
base_name = os.path.join(
|
||||
dir_out, file_name + '_line_' + str(indexer_textlines)
|
||||
)
|
||||
if pref_of_dataset:
|
||||
base_name += '_' + pref_of_dataset
|
||||
if not do_not_mask_with_textline_contour:
|
||||
base_name += '_masked'
|
||||
|
||||
if child_textlines.tag.endswith("TextEquiv"):
|
||||
for cheild_text in child_textlines:
|
||||
if cheild_text.tag.endswith("Unicode"):
|
||||
textline_text = cheild_text.text
|
||||
if textline_text:
|
||||
base_name = os.path.join(
|
||||
dir_out, file_name + '_line_' + str(indexer_textlines)
|
||||
)
|
||||
if pref_of_dataset:
|
||||
base_name += '_' + pref_of_dataset
|
||||
if not do_not_mask_with_textline_contour:
|
||||
base_name += '_masked'
|
||||
|
||||
with open(base_name + '.txt', 'w') as text_file:
|
||||
text_file.write(textline_text)
|
||||
cv2.imwrite(base_name + '.png', img_crop)
|
||||
indexer_textlines += 1
|
||||
with open(base_name + '.txt', 'w') as text_file:
|
||||
text_file.write(textline_text)
|
||||
cv2.imwrite(base_name + '.png', img_crop)
|
||||
indexer_textlines += 1
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue