mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 19:59:56 +02:00
check_dpi: use OcrdExif instead identify callout
This commit is contained in:
parent
0a92543245
commit
8c603ae16d
3 changed files with 38 additions and 16 deletions
|
@ -12,8 +12,9 @@ import time
|
||||||
import warnings
|
import warnings
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from multiprocessing import Process, Queue, cpu_count
|
from multiprocessing import Process, Queue, cpu_count
|
||||||
from ocrd_utils import getLogger
|
|
||||||
|
|
||||||
|
from lxml import etree as ET
|
||||||
|
from ocrd_utils import getLogger
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
@ -27,14 +28,6 @@ import tensorflow as tf
|
||||||
tf.get_logger().setLevel("ERROR")
|
tf.get_logger().setLevel("ERROR")
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
from scipy.signal import find_peaks
|
|
||||||
from scipy.ndimage import gaussian_filter1d
|
|
||||||
from shapely import geometry
|
|
||||||
from lxml import etree as ET
|
|
||||||
from matplotlib import pyplot, transforms
|
|
||||||
import matplotlib.patches as mpatches
|
|
||||||
import imutils
|
|
||||||
|
|
||||||
from .utils.contour import (
|
from .utils.contour import (
|
||||||
contours_in_same_horizon,
|
contours_in_same_horizon,
|
||||||
filter_contours_area_of_image_interiors,
|
filter_contours_area_of_image_interiors,
|
||||||
|
@ -115,7 +108,7 @@ from .utils import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from .utils.xml import create_page_xml
|
from .utils.xml import create_page_xml
|
||||||
|
from .utils.pil_cv2 import check_dpi
|
||||||
from .plot import EynollahPlotter
|
from .plot import EynollahPlotter
|
||||||
|
|
||||||
SLOPE_THRESHOLD = 0.13
|
SLOPE_THRESHOLD = 0.13
|
||||||
|
@ -275,11 +268,6 @@ class eynollah:
|
||||||
|
|
||||||
return prediction_true
|
return prediction_true
|
||||||
|
|
||||||
def check_dpi(self):
|
|
||||||
self.logger.debug("enter check_dpi")
|
|
||||||
dpi = os.popen('identify -format "%x " ' + self.image_filename).read()
|
|
||||||
return int(float(dpi))
|
|
||||||
|
|
||||||
def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred):
|
def calculate_width_height_by_columns(self, img, num_col, width_early, label_p_pred):
|
||||||
self.logger.debug("enter calculate_width_height_by_columns")
|
self.logger.debug("enter calculate_width_height_by_columns")
|
||||||
if num_col == 1 and width_early < 1100:
|
if num_col == 1 and width_early < 1100:
|
||||||
|
@ -389,7 +377,7 @@ class eynollah:
|
||||||
|
|
||||||
def resize_and_enhance_image_with_column_classifier(self):
|
def resize_and_enhance_image_with_column_classifier(self):
|
||||||
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
|
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
|
||||||
dpi = self.check_dpi()
|
dpi = check_dpi(self.image_filename)
|
||||||
self.logger.info("Detected %s DPI" % dpi)
|
self.logger.info("Detected %s DPI" % dpi)
|
||||||
img = cv2.imread(self.image_filename)
|
img = cv2.imread(self.image_filename)
|
||||||
|
|
||||||
|
|
24
sbb_newspapers_org_image/utils/pil_cv2.py
Normal file
24
sbb_newspapers_org_image/utils/pil_cv2.py
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
from PIL import Image
|
||||||
|
import numpy as np
|
||||||
|
from ocrd_models import OcrdExif
|
||||||
|
from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, cvtColor, imread
|
||||||
|
|
||||||
|
# from sbb_binarization
|
||||||
|
|
||||||
|
def cv2pil(img):
|
||||||
|
return Image.fromarray(img.astype('uint8'))
|
||||||
|
|
||||||
|
def pil2cv(img):
|
||||||
|
# from ocrd/workspace.py
|
||||||
|
color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else COLOR_RGB2BGR
|
||||||
|
pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
|
||||||
|
return cvtColor(pil_as_np_array, color_conversion)
|
||||||
|
|
||||||
|
def check_dpi(image_filename):
|
||||||
|
exif = OcrdExif(Image.open(image_filename))
|
||||||
|
print(exif.to_xml())
|
||||||
|
resolution = exif.resolution
|
||||||
|
if exif.resolutionUnit == 'cm':
|
||||||
|
resolution /= 2.54
|
||||||
|
return int(resolution)
|
||||||
|
|
10
tests/test_dpi.py
Normal file
10
tests/test_dpi.py
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
from pathlib import Path
|
||||||
|
from sbb_newspapers_org_image.utils.pil_cv2 import check_dpi
|
||||||
|
from tests.base import main
|
||||||
|
|
||||||
|
def test_dpi():
|
||||||
|
fpath = Path(__file__).parent.joinpath('resources', 'kant_aufklaerung_1784_0020.tif')
|
||||||
|
assert 300 == check_dpi(str(fpath))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(__file__)
|
Loading…
Add table
Add a link
Reference in a new issue