binarize/enhance/layout/ocr ls_imgs: use the same file name suffix filter for dir-in mode

This commit is contained in:
Robert Sachunsky 2025-09-25 00:11:23 +02:00
parent 96a0d22496
commit d6cdb69acb
4 changed files with 19 additions and 10 deletions

View file

@ -107,6 +107,7 @@ from .utils.drop_capitals import (
from .utils.marginals import get_marginals
from .utils.resize import resize_image
from .utils import (
is_image_filename,
boosting_headers_by_longshot_region_segmentation,
crop_image_inside_box,
find_num_col,
@ -4547,14 +4548,13 @@ class Eynollah:
self.logger.info("Enabled modes: " + ", ".join(enabled_modes))
if dir_in:
self.ls_imgs = os.listdir(dir_in)
self.ls_imgs = [ind_img for ind_img in self.ls_imgs if ind_img.endswith('.jpg') or ind_img.endswith('.jpeg') or ind_img.endswith('.png') or ind_img.endswith('.tif') or ind_img.endswith('.tiff') or ind_img.endswith('.JPG') or ind_img.endswith('.JPEG') or ind_img.endswith('.TIF') or ind_img.endswith('.TIFF') or ind_img.endswith('.PNG')]
ls_imgs = list(filter(is_image_filename, os.listdir(self.dir_in)))
elif image_filename:
self.ls_imgs = [image_filename]
ls_imgs = [image_filename]
else:
raise ValueError("run requires either a single image filename or a directory")
for img_filename in self.ls_imgs:
for img_filename in ls_imgs:
self.logger.info(img_filename)
t0 = time.time()
@ -5394,8 +5394,7 @@ class Eynollah_ocr:
def run(self, overwrite : bool = False):
if self.dir_in:
ls_imgs = os.listdir(self.dir_in)
ls_imgs = [ind_img for ind_img in ls_imgs if ind_img.endswith('.jpg') or ind_img.endswith('.jpeg') or ind_img.endswith('.png') or ind_img.endswith('.tif') or ind_img.endswith('.tiff') or ind_img.endswith('.JPG') or ind_img.endswith('.JPEG') or ind_img.endswith('.TIF') or ind_img.endswith('.TIFF') or ind_img.endswith('.PNG')]
ls_imgs = list(filter(is_image_filename, os.listdir(self.dir_in)))
else:
ls_imgs = [self.image_filename]

View file

@ -21,6 +21,7 @@ from tensorflow.keras.models import load_model
from .utils.resize import resize_image
from .utils.pil_cv2 import pil2cv
from .utils import (
is_image_filename,
crop_image_inside_box
)
@ -701,13 +702,13 @@ class Enhancer:
t0_tot = time.time()
if dir_in:
self.ls_imgs = os.listdir(dir_in)
ls_imgs = list(filter(is_image_filename, os.listdir(dir_in)))
elif image_filename:
self.ls_imgs = [image_filename]
ls_imgs = [image_filename]
else:
raise ValueError("run requires either a single image filename or a directory")
for img_filename in self.ls_imgs:
for img_filename in ls_imgs:
self.logger.info(img_filename)
t0 = time.time()

View file

@ -16,6 +16,7 @@ import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.python.keras import backend as tensorflow_backend
from .utils import is_image_filename
def resize_image(img_in, input_height, input_width):
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
@ -347,7 +348,7 @@ class SbbBinarizer:
cv2.imwrite(output, img_last)
return img_last
else:
ls_imgs = os.listdir(dir_in)
ls_imgs = list(filter(is_image_filename, os.listdir(dir_in)))
for image_name in ls_imgs:
image_stem = image_name.split('.')[0]
print(image_name,'image_name')

View file

@ -2194,3 +2194,11 @@ def return_boxes_of_images_by_order_of_reading_new(
return boxes, peaks_neg_tot_tables_new
else:
return boxes, peaks_neg_tot_tables
def is_image_filename(fname: str) -> bool:
return fname.lower().endswith(('.jpg',
'.jpeg',
'.png',
'.tif',
'.tiff',
))