diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 27277ee..9071f7a 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -107,6 +107,7 @@ from .utils.drop_capitals import ( from .utils.marginals import get_marginals from .utils.resize import resize_image from .utils import ( + is_image_filename, boosting_headers_by_longshot_region_segmentation, crop_image_inside_box, find_num_col, @@ -4547,14 +4548,13 @@ class Eynollah: self.logger.info("Enabled modes: " + ", ".join(enabled_modes)) if dir_in: - self.ls_imgs = os.listdir(dir_in) - self.ls_imgs = [ind_img for ind_img in self.ls_imgs if ind_img.endswith('.jpg') or ind_img.endswith('.jpeg') or ind_img.endswith('.png') or ind_img.endswith('.tif') or ind_img.endswith('.tiff') or ind_img.endswith('.JPG') or ind_img.endswith('.JPEG') or ind_img.endswith('.TIF') or ind_img.endswith('.TIFF') or ind_img.endswith('.PNG')] + ls_imgs = list(filter(is_image_filename, os.listdir(self.dir_in))) elif image_filename: - self.ls_imgs = [image_filename] + ls_imgs = [image_filename] else: raise ValueError("run requires either a single image filename or a directory") - for img_filename in self.ls_imgs: + for img_filename in ls_imgs: self.logger.info(img_filename) t0 = time.time() @@ -5394,8 +5394,7 @@ class Eynollah_ocr: def run(self, overwrite : bool = False): if self.dir_in: - ls_imgs = os.listdir(self.dir_in) - ls_imgs = [ind_img for ind_img in ls_imgs if ind_img.endswith('.jpg') or ind_img.endswith('.jpeg') or ind_img.endswith('.png') or ind_img.endswith('.tif') or ind_img.endswith('.tiff') or ind_img.endswith('.JPG') or ind_img.endswith('.JPEG') or ind_img.endswith('.TIF') or ind_img.endswith('.TIFF') or ind_img.endswith('.PNG')] + ls_imgs = list(filter(is_image_filename, os.listdir(self.dir_in))) else: ls_imgs = [self.image_filename] diff --git a/src/eynollah/image_enhancer.py b/src/eynollah/image_enhancer.py index f577e52..5a06d59 100644 --- a/src/eynollah/image_enhancer.py +++ b/src/eynollah/image_enhancer.py @@ -21,6 +21,7 @@ from tensorflow.keras.models import load_model from .utils.resize import resize_image from .utils.pil_cv2 import pil2cv from .utils import ( + is_image_filename, crop_image_inside_box ) @@ -701,13 +702,13 @@ class Enhancer: t0_tot = time.time() if dir_in: - self.ls_imgs = os.listdir(dir_in) + ls_imgs = list(filter(is_image_filename, os.listdir(dir_in))) elif image_filename: - self.ls_imgs = [image_filename] + ls_imgs = [image_filename] else: raise ValueError("run requires either a single image filename or a directory") - for img_filename in self.ls_imgs: + for img_filename in ls_imgs: self.logger.info(img_filename) t0 = time.time() diff --git a/src/eynollah/sbb_binarize.py b/src/eynollah/sbb_binarize.py index 2d5035f..3716987 100644 --- a/src/eynollah/sbb_binarize.py +++ b/src/eynollah/sbb_binarize.py @@ -16,6 +16,7 @@ import tensorflow as tf from tensorflow.keras.models import load_model from tensorflow.python.keras import backend as tensorflow_backend +from .utils import is_image_filename def resize_image(img_in, input_height, input_width): return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) @@ -347,7 +348,7 @@ class SbbBinarizer: cv2.imwrite(output, img_last) return img_last else: - ls_imgs = os.listdir(dir_in) + ls_imgs = list(filter(is_image_filename, os.listdir(dir_in))) for image_name in ls_imgs: image_stem = image_name.split('.')[0] print(image_name,'image_name') diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index ca86047..c154fe4 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -2194,3 +2194,11 @@ def return_boxes_of_images_by_order_of_reading_new( return boxes, peaks_neg_tot_tables_new else: return boxes, peaks_neg_tot_tables + +def is_image_filename(fname: str) -> bool: + return fname.lower().endswith(('.jpg', + '.jpeg', + '.png', + '.tif', + '.tiff', + ))