diff --git a/sbb_newspapers_org_image/eynollah.py b/sbb_newspapers_org_image/eynollah.py index 66eb8d8..a118d3b 100644 --- a/sbb_newspapers_org_image/eynollah.py +++ b/sbb_newspapers_org_image/eynollah.py @@ -12,8 +12,9 @@ import time import warnings from pathlib import Path from multiprocessing import Process, Queue, cpu_count -from ocrd_utils import getLogger +from lxml import etree as ET +from ocrd_utils import getLogger import cv2 import numpy as np @@ -27,14 +28,6 @@ import tensorflow as tf tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") -from scipy.signal import find_peaks -from scipy.ndimage import gaussian_filter1d -from shapely import geometry -from lxml import etree as ET -from matplotlib import pyplot, transforms -import matplotlib.patches as mpatches -import imutils - from .utils.contour import ( contours_in_same_horizon, filter_contours_area_of_image_interiors, @@ -115,7 +108,7 @@ from .utils import ( ) from .utils.xml import create_page_xml - +from .utils.pil_cv2 import check_dpi from .plot import EynollahPlotter SLOPE_THRESHOLD = 0.13 @@ -275,11 +268,6 @@ class eynollah: return prediction_true - def check_dpi(self): - self.logger.debug("enter check_dpi") - dpi = os.popen('identify -format "%x " ' + self.image_filename).read() - return int(float(dpi)) - def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred): self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1 and width_early < 1100: @@ -389,7 +377,7 @@ class eynollah: def resize_and_enhance_image_with_column_classifier(self): self.logger.debug("enter resize_and_enhance_image_with_column_classifier") - dpi = self.check_dpi() + dpi = check_dpi(self.image_filename) self.logger.info("Detected %s DPI" % dpi) img = cv2.imread(self.image_filename) diff --git a/sbb_newspapers_org_image/utils/pil_cv2.py b/sbb_newspapers_org_image/utils/pil_cv2.py new file mode 100644 index 0000000..d7cd18d --- /dev/null +++ b/sbb_newspapers_org_image/utils/pil_cv2.py @@ -0,0 +1,24 @@ +from PIL import Image +import numpy as np +from ocrd_models import OcrdExif +from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, cvtColor, imread + +# from sbb_binarization + +def cv2pil(img): + return Image.fromarray(img.astype('uint8')) + +def pil2cv(img): + # from ocrd/workspace.py + color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else COLOR_RGB2BGR + pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img) + return cvtColor(pil_as_np_array, color_conversion) + +def check_dpi(image_filename): + exif = OcrdExif(Image.open(image_filename)) + print(exif.to_xml()) + resolution = exif.resolution + if exif.resolutionUnit == 'cm': + resolution /= 2.54 + return int(resolution) + diff --git a/tests/test_dpi.py b/tests/test_dpi.py new file mode 100644 index 0000000..59c5df4 --- /dev/null +++ b/tests/test_dpi.py @@ -0,0 +1,10 @@ +from pathlib import Path +from sbb_newspapers_org_image.utils.pil_cv2 import check_dpi +from tests.base import main + +def test_dpi(): + fpath = Path(__file__).parent.joinpath('resources', 'kant_aufklaerung_1784_0020.tif') + assert 300 == check_dpi(str(fpath)) + +if __name__ == '__main__': + main(__file__)