Restore correct execution of export_textline_images_and_text

2026-03-13 02:31:56 +01:00 · 2025-12-03 15:40:52 +01:00 · 2025-12-03 15:40:52 +01:00 · 7bf5e077d9
commit 7bf5e077d9
parent 6ac37af2f8
1 changed files with 22 additions and 24 deletions
--- a/src/eynollah/training/extract_line_gt.py
+++ b/src/eynollah/training/extract_line_gt.py
@ -20,7 +20,6 @@ from ..utils import is_image_filename
@click.option(
    "--dir_in",
    "-di",
-    'image_filename',
    help="directory of input images (instead of --image)",
    type=click.Path(exists=True, file_okay=False),
 )
@ -52,21 +51,21 @@ from ..utils import is_image_filename
    help="if this parameter set to true, cropped textline images will not be masked with textline contour.",
 )
 def linegt_cli(
-    image_filename,
+    image,
    dir_in,
    dir_xmls,
    dir_out,
    pref_of_dataset,
    do_not_mask_with_textline_contour,
 ):
-    assert bool(dir_in) ^ bool(image_filename), "Set --dir-in or --image-filename, not both"
+    assert bool(dir_in) ^ bool(image), "Set --dir-in or --image-filename, not both"
    if dir_in:
        ls_imgs = [
-            os.path.join(dir_in, image_filename) for image_filename in filter(is_image_filename, os.listdir(dir_in))
+            os.path.join(dir_in, image) for image in filter(is_image_filename, os.listdir(dir_in))
        ]
    else:
-        assert image_filename
-        ls_imgs = [image_filename]
+        assert image
+        ls_imgs = [image]

    for dir_img in ls_imgs:
        file_name = Path(dir_img).stem
@ -83,7 +82,7 @@ def linegt_cli(
        name_space = alltags[0].split('}')[0]
        name_space = name_space.split('{')[1]

-        region_tags = [x for x in alltags if x.endswith('TextRegion')][0]
+        region_tags = np.unique([x for x in alltags if x.endswith('TextRegion')])

        cropped_lines_region_indexer = []

@ -116,21 +115,20 @@ def linegt_cli(

                            if img_crop.shape[0] == 0 or img_crop.shape[1] == 0:
                                continue
+                        if child_textlines.tag.endswith("TextEquiv"):
+                            for cheild_text in child_textlines:
+                                if cheild_text.tag.endswith("Unicode"):
+                                    textline_text = cheild_text.text
+                                    if textline_text:
+                                        base_name = os.path.join(
+                                            dir_out, file_name + '_line_' + str(indexer_textlines)
+                                        )
+                                        if pref_of_dataset:
+                                            base_name += '_' + pref_of_dataset
+                                        if not do_not_mask_with_textline_contour:
+                                            base_name += '_masked'

-                            if child_textlines.tag.endswith("TextEquiv"):
-                                for cheild_text in child_textlines:
-                                    if cheild_text.tag.endswith("Unicode"):
-                                        textline_text = cheild_text.text
-                                        if textline_text:
-                                            base_name = os.path.join(
-                                                dir_out, file_name + '_line_' + str(indexer_textlines)
-                                            )
-                                            if pref_of_dataset:
-                                                base_name += '_' + pref_of_dataset
-                                            if not do_not_mask_with_textline_contour:
-                                                base_name += '_masked'
-
-                                            with open(base_name + '.txt', 'w') as text_file:
-                                                text_file.write(textline_text)
-                                            cv2.imwrite(base_name + '.png', img_crop)
-                                        indexer_textlines += 1
+                                        with open(base_name + '.txt', 'w') as text_file:
+                                            text_file.write(textline_text)
+                                        cv2.imwrite(base_name + '.png', img_crop)
+                                    indexer_textlines += 1