check_dpi: use OcrdExif instead identify callout

pull/19/head
Konstantin Baierer 4 years ago
parent 0a92543245
commit 8c603ae16d

@ -12,8 +12,9 @@ import time
import warnings import warnings
from pathlib import Path from pathlib import Path
from multiprocessing import Process, Queue, cpu_count from multiprocessing import Process, Queue, cpu_count
from ocrd_utils import getLogger
from lxml import etree as ET
from ocrd_utils import getLogger
import cv2 import cv2
import numpy as np import numpy as np
@ -27,14 +28,6 @@ import tensorflow as tf
tf.get_logger().setLevel("ERROR") tf.get_logger().setLevel("ERROR")
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter1d
from shapely import geometry
from lxml import etree as ET
from matplotlib import pyplot, transforms
import matplotlib.patches as mpatches
import imutils
from .utils.contour import ( from .utils.contour import (
contours_in_same_horizon, contours_in_same_horizon,
filter_contours_area_of_image_interiors, filter_contours_area_of_image_interiors,
@ -115,7 +108,7 @@ from .utils import (
) )
from .utils.xml import create_page_xml from .utils.xml import create_page_xml
from .utils.pil_cv2 import check_dpi
from .plot import EynollahPlotter from .plot import EynollahPlotter
SLOPE_THRESHOLD = 0.13 SLOPE_THRESHOLD = 0.13
@ -275,11 +268,6 @@ class eynollah:
return prediction_true return prediction_true
def check_dpi(self):
self.logger.debug("enter check_dpi")
dpi = os.popen('identify -format "%x " ' + self.image_filename).read()
return int(float(dpi))
def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred): def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred):
self.logger.debug("enter calculate_width_height_by_columns") self.logger.debug("enter calculate_width_height_by_columns")
if num_col == 1 and width_early < 1100: if num_col == 1 and width_early < 1100:
@ -389,7 +377,7 @@ class eynollah:
def resize_and_enhance_image_with_column_classifier(self): def resize_and_enhance_image_with_column_classifier(self):
self.logger.debug("enter resize_and_enhance_image_with_column_classifier") self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
dpi = self.check_dpi() dpi = check_dpi(self.image_filename)
self.logger.info("Detected %s DPI" % dpi) self.logger.info("Detected %s DPI" % dpi)
img = cv2.imread(self.image_filename) img = cv2.imread(self.image_filename)

@ -0,0 +1,24 @@
from PIL import Image
import numpy as np
from ocrd_models import OcrdExif
from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, cvtColor, imread
# from sbb_binarization
def cv2pil(img):
return Image.fromarray(img.astype('uint8'))
def pil2cv(img):
# from ocrd/workspace.py
color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else COLOR_RGB2BGR
pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
return cvtColor(pil_as_np_array, color_conversion)
def check_dpi(image_filename):
exif = OcrdExif(Image.open(image_filename))
print(exif.to_xml())
resolution = exif.resolution
if exif.resolutionUnit == 'cm':
resolution /= 2.54
return int(resolution)

@ -0,0 +1,10 @@
from pathlib import Path
from sbb_newspapers_org_image.utils.pil_cv2 import check_dpi
from tests.base import main
def test_dpi():
fpath = Path(__file__).parent.joinpath('resources', 'kant_aufklaerung_1784_0020.tif')
assert 300 == check_dpi(str(fpath))
if __name__ == '__main__':
main(__file__)
Loading…
Cancel
Save