From 9967510327d33a49aa619ceba7a36f414fdc09e7 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Sep 2025 00:52:16 +0200 Subject: [PATCH] mbreorder: filter by .xml suffix in dir-in mode --- src/eynollah/mb_ro_on_layout.py | 7 ++++--- src/eynollah/utils/__init__.py | 3 +++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/eynollah/mb_ro_on_layout.py b/src/eynollah/mb_ro_on_layout.py index 70f1402..6d72614 100644 --- a/src/eynollah/mb_ro_on_layout.py +++ b/src/eynollah/mb_ro_on_layout.py @@ -25,6 +25,7 @@ from .utils.contour import ( return_contours_of_image, return_parent_contours, ) +from .utils import is_xml_filename DPI_THRESHOLD = 298 KERNEL = np.ones((5, 5), np.uint8) @@ -751,13 +752,13 @@ class machine_based_reading_order_on_layout: t0_tot = time.time() if dir_in: - self.ls_xmls = os.listdir(dir_in) + ls_xmls = list(filter(is_xml_filename, os.listdir(dir_in))) elif xml_filename: - self.ls_xmls = [xml_filename] + ls_xmls = [xml_filename] else: raise ValueError("run requires either a single image filename or a directory") - for xml_filename in self.ls_xmls: + for xml_filename in ls_xmls: self.logger.info(xml_filename) t0 = time.time() diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index c154fe4..6eeabd0 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -2202,3 +2202,6 @@ def is_image_filename(fname: str) -> bool: '.tif', '.tiff', )) + +def is_xml_filename(fname: str) -> bool: + return fname.lower().endswith('.xml')