mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 19:59:56 +02:00
use Page.imageFilename directly for accurate DPI estimate
This commit is contained in:
parent
42ccb4711d
commit
2e8a3e3bee
2 changed files with 18 additions and 8 deletions
|
@ -354,9 +354,7 @@ class Eynollah:
|
||||||
|
|
||||||
def resize_and_enhance_image_with_column_classifier(self):
|
def resize_and_enhance_image_with_column_classifier(self):
|
||||||
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
|
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
|
||||||
if self.override_dpi:
|
dpi = self.override_dpi if self.override_dpi else check_dpi(self.imread())
|
||||||
return self.override_dpi
|
|
||||||
dpi = check_dpi(self.imread())
|
|
||||||
self.logger.info("Detected %s DPI", dpi)
|
self.logger.info("Detected %s DPI", dpi)
|
||||||
img = self.imread()
|
img = self.imread()
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,14 @@
|
||||||
from json import loads
|
from json import loads
|
||||||
from pkg_resources import resource_string
|
from pkg_resources import resource_string
|
||||||
from tempfile import NamedTemporaryFile
|
from tempfile import NamedTemporaryFile
|
||||||
|
from pathlib import Path
|
||||||
from os.path import join
|
from os.path import join
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from ocrd import Processor
|
from ocrd import Processor
|
||||||
from ocrd_modelfactory import page_from_file
|
from ocrd_modelfactory import page_from_file, exif_from_filename
|
||||||
|
from ocrd_models import OcrdFile, OcrdExif
|
||||||
from ocrd_models.ocrd_page import to_xml
|
from ocrd_models.ocrd_page import to_xml
|
||||||
from ocrd_utils import (
|
from ocrd_utils import (
|
||||||
getLogger,
|
getLogger,
|
||||||
|
@ -35,7 +39,15 @@ class EynollahProcessor(Processor):
|
||||||
pcgts = page_from_file(self.workspace.download_file(input_file))
|
pcgts = page_from_file(self.workspace.download_file(input_file))
|
||||||
self.add_metadata(pcgts)
|
self.add_metadata(pcgts)
|
||||||
page = pcgts.get_Page()
|
page = pcgts.get_Page()
|
||||||
page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized')
|
# XXX loses DPI information
|
||||||
|
# page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized')
|
||||||
|
self.workspace.download_file(next(self.workspace.mets.find_files(url=page.imageFilename)))
|
||||||
|
if self.parameter['dpi'] <= 0:
|
||||||
|
exif = exif_from_filename(page.imageFilename)
|
||||||
|
dpi = exif.resolution
|
||||||
|
if exif.resolutionUnit == 'cm':
|
||||||
|
dpi /= 2.54
|
||||||
|
self.parameter['dpi'] = dpi
|
||||||
eynollah_kwargs = {
|
eynollah_kwargs = {
|
||||||
'dir_models': self.resolve_resource(self.parameter['models']),
|
'dir_models': self.resolve_resource(self.parameter['models']),
|
||||||
'allow_enhancement': self.parameter['allow_enhancement'],
|
'allow_enhancement': self.parameter['allow_enhancement'],
|
||||||
|
@ -43,11 +55,11 @@ class EynollahProcessor(Processor):
|
||||||
'full_layout': self.parameter['full_layout'],
|
'full_layout': self.parameter['full_layout'],
|
||||||
'allow_scaling': self.parameter['allow_scaling'],
|
'allow_scaling': self.parameter['allow_scaling'],
|
||||||
'headers_off': self.parameter['headers_off'],
|
'headers_off': self.parameter['headers_off'],
|
||||||
'override_dpi': self.parameter['dpi'] if self.parameter['dpi'] > 0 else None,
|
'override_dpi': self.parameter['dpi'],
|
||||||
'logger': LOG,
|
'logger': LOG,
|
||||||
'pcgts': pcgts,
|
'pcgts': pcgts,
|
||||||
'image_pil': page_image,
|
'image_filename': page.imageFilename
|
||||||
'image_filename': None}
|
}
|
||||||
Eynollah(**eynollah_kwargs).run()
|
Eynollah(**eynollah_kwargs).run()
|
||||||
file_id = make_file_id(input_file, self.output_file_grp)
|
file_id = make_file_id(input_file, self.output_file_grp)
|
||||||
self.workspace.add_file(
|
self.workspace.add_file(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue