From acda9c84eecca75e5260b2172923f59e86838a73 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 28 Jan 2026 13:28:03 +0100 Subject: [PATCH] =?UTF-8?q?training.gt=5Fgen=5Futils:=20improve=20XML?= =?UTF-8?q?=E2=86=92img=20path=20mapping=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when matching files in `dir_images` by XML path name stem, * use `dict` instead of `list` to assign reliably * filter out `.xml` files (so input directories can be mixed) * show informative warnings for files which cannot be matched --- src/eynollah/training/gt_gen_utils.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/eynollah/training/gt_gen_utils.py b/src/eynollah/training/gt_gen_utils.py index b7c35ee..f4defdd 100644 --- a/src/eynollah/training/gt_gen_utils.py +++ b/src/eynollah/training/gt_gen_utils.py @@ -627,7 +627,10 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ if dir_images: ls_org_imgs = os.listdir(dir_images) - ls_org_imgs_stem = [os.path.splitext(item)[0] for item in ls_org_imgs] + ls_org_imgs = {os.path.splitext(item)[0]: item + for item in ls_org_imgs + if not item.endswith('.xml')} + for index in tqdm(range(len(gt_list))): #try: print(gt_list[index]) @@ -802,7 +805,13 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ cv2.imwrite(os.path.join(output_dir, xml_file_stem + '.png'), img_poly) if dir_images: - org_image_name = ls_org_imgs[ls_org_imgs_stem.index(xml_file_stem)] + org_image_name = ls_org_imgs[xml_file_stem] + if not org_image_name: + print("image file for XML stem", xml_file_stem, "is missing") + continue + if not os.path.isfile(os.path.join(dir_images, org_image_name)): + print("image file for XML stem", xml_file_stem, "is not readable") + continue img_org = cv2.imread(os.path.join(dir_images, org_image_name)) if printspace and config_params['use_case']!='printspace': @@ -1266,7 +1275,13 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ if dir_images: - org_image_name = ls_org_imgs[ls_org_imgs_stem.index(xml_file_stem)] + org_image_name = ls_org_imgs[xml_file_stem] + if not org_image_name: + print("image file for XML stem", xml_file_stem, "is missing") + continue + if not os.path.isfile(os.path.join(dir_images, org_image_name)): + print("image file for XML stem", xml_file_stem, "is not readable") + continue img_org = cv2.imread(os.path.join(dir_images, org_image_name)) if printspace: