mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 19:59:56 +02:00
commit
d5be8aece3
11 changed files with 211 additions and 59 deletions
1
ocrd-tool.json
Symbolic link
1
ocrd-tool.json
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
qurator/eynollah/ocrd-tool.json
|
|
@ -117,20 +117,19 @@ def main(
|
||||||
print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa or -si")
|
print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa or -si")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
eynollah = Eynollah(
|
eynollah = Eynollah(
|
||||||
image,
|
image_filename=image,
|
||||||
None,
|
dir_out=out,
|
||||||
out,
|
dir_models=model,
|
||||||
model,
|
dir_of_cropped_images=save_images,
|
||||||
save_images,
|
dir_of_layout=save_layout,
|
||||||
save_layout,
|
dir_of_deskewed=save_deskewed,
|
||||||
save_deskewed,
|
dir_of_all=save_all,
|
||||||
save_all,
|
enable_plotting=enable_plotting,
|
||||||
enable_plotting,
|
allow_enhancement=allow_enhancement,
|
||||||
allow_enhancement,
|
curved_line=curved_line,
|
||||||
curved_line,
|
full_layout=full_layout,
|
||||||
full_layout,
|
allow_scaling=allow_scaling,
|
||||||
allow_scaling,
|
headers_off=headers_off,
|
||||||
headers_off,
|
|
||||||
)
|
)
|
||||||
pcgts = eynollah.run()
|
pcgts = eynollah.run()
|
||||||
eynollah.writer.write_pagexml(pcgts)
|
eynollah.writer.write_pagexml(pcgts)
|
||||||
|
|
|
@ -65,7 +65,7 @@ from .utils import (
|
||||||
order_of_regions,
|
order_of_regions,
|
||||||
find_number_of_columns_in_document,
|
find_number_of_columns_in_document,
|
||||||
return_boxes_of_images_by_order_of_reading_new)
|
return_boxes_of_images_by_order_of_reading_new)
|
||||||
from .utils.pil_cv2 import check_dpi
|
from .utils.pil_cv2 import check_dpi, pil2cv
|
||||||
from .utils.xml import order_and_id_of_texts
|
from .utils.xml import order_and_id_of_texts
|
||||||
from .plot import EynollahPlotter
|
from .plot import EynollahPlotter
|
||||||
from .writer import EynollahXmlWriter
|
from .writer import EynollahXmlWriter
|
||||||
|
@ -79,10 +79,11 @@ KERNEL = np.ones((5, 5), np.uint8)
|
||||||
class Eynollah:
|
class Eynollah:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
image_filename,
|
|
||||||
image_filename_stem,
|
|
||||||
dir_out,
|
|
||||||
dir_models,
|
dir_models,
|
||||||
|
image_filename,
|
||||||
|
image_pil=None,
|
||||||
|
image_filename_stem=None,
|
||||||
|
dir_out=None,
|
||||||
dir_of_cropped_images=None,
|
dir_of_cropped_images=None,
|
||||||
dir_of_layout=None,
|
dir_of_layout=None,
|
||||||
dir_of_deskewed=None,
|
dir_of_deskewed=None,
|
||||||
|
@ -92,30 +93,36 @@ class Eynollah:
|
||||||
curved_line=False,
|
curved_line=False,
|
||||||
full_layout=False,
|
full_layout=False,
|
||||||
allow_scaling=False,
|
allow_scaling=False,
|
||||||
headers_off=False
|
headers_off=False,
|
||||||
|
override_dpi=None,
|
||||||
|
logger=None,
|
||||||
|
pcgts=None,
|
||||||
):
|
):
|
||||||
|
if image_pil:
|
||||||
|
self._imgs = self._cache_images(image_pil=image_pil)
|
||||||
|
else:
|
||||||
|
self._imgs = self._cache_images(image_filename=image_filename)
|
||||||
|
if override_dpi:
|
||||||
|
self.dpi = override_dpi
|
||||||
self.image_filename = image_filename
|
self.image_filename = image_filename
|
||||||
self.dir_out = dir_out
|
self.dir_out = dir_out
|
||||||
self.image_filename_stem = image_filename_stem
|
|
||||||
self.allow_enhancement = allow_enhancement
|
self.allow_enhancement = allow_enhancement
|
||||||
self.curved_line = curved_line
|
self.curved_line = curved_line
|
||||||
self.full_layout = full_layout
|
self.full_layout = full_layout
|
||||||
self.allow_scaling = allow_scaling
|
self.allow_scaling = allow_scaling
|
||||||
self.headers_off = headers_off
|
self.headers_off = headers_off
|
||||||
if not self.image_filename_stem:
|
|
||||||
self.image_filename_stem = Path(Path(image_filename).name).stem
|
|
||||||
self.plotter = None if not enable_plotting else EynollahPlotter(
|
self.plotter = None if not enable_plotting else EynollahPlotter(
|
||||||
dir_of_all=dir_of_all,
|
dir_of_all=dir_of_all,
|
||||||
dir_of_deskewed=dir_of_deskewed,
|
dir_of_deskewed=dir_of_deskewed,
|
||||||
dir_of_cropped_images=dir_of_cropped_images,
|
dir_of_cropped_images=dir_of_cropped_images,
|
||||||
dir_of_layout=dir_of_layout,
|
dir_of_layout=dir_of_layout,
|
||||||
image_filename=image_filename,
|
image_filename_stem=Path(Path(image_filename).name).stem)
|
||||||
image_filename_stem=self.image_filename_stem)
|
|
||||||
self.writer = EynollahXmlWriter(
|
self.writer = EynollahXmlWriter(
|
||||||
dir_out=self.dir_out,
|
dir_out=self.dir_out,
|
||||||
image_filename=self.image_filename,
|
image_filename=self.image_filename,
|
||||||
curved_line=self.curved_line)
|
curved_line=self.curved_line,
|
||||||
self.logger = getLogger('eynollah')
|
pcgts=pcgts)
|
||||||
|
self.logger = logger if logger else getLogger('eynollah')
|
||||||
self.dir_models = dir_models
|
self.dir_models = dir_models
|
||||||
|
|
||||||
self.model_dir_of_enhancement = dir_models + "/model_enhancement.h5"
|
self.model_dir_of_enhancement = dir_models + "/model_enhancement.h5"
|
||||||
|
@ -128,7 +135,18 @@ class Eynollah:
|
||||||
self.model_region_dir_p_ens = dir_models + "/model_ensemble_s.h5"
|
self.model_region_dir_p_ens = dir_models + "/model_ensemble_s.h5"
|
||||||
self.model_textline_dir = dir_models + "/model_textline_newspapers.h5"
|
self.model_textline_dir = dir_models + "/model_textline_newspapers.h5"
|
||||||
|
|
||||||
self._imgs = {}
|
def _cache_images(self, image_filename=None, image_pil=None):
|
||||||
|
ret = {}
|
||||||
|
if image_filename:
|
||||||
|
ret['img'] = cv2.imread(image_filename)
|
||||||
|
self.dpi = check_dpi(image_filename)
|
||||||
|
else:
|
||||||
|
ret['img'] = pil2cv(image_pil)
|
||||||
|
self.dpi = check_dpi(image_pil)
|
||||||
|
ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY)
|
||||||
|
for prefix in ('', '_grayscale'):
|
||||||
|
ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8)
|
||||||
|
return ret
|
||||||
|
|
||||||
def imread(self, grayscale=False, uint8=True):
|
def imread(self, grayscale=False, uint8=True):
|
||||||
key = 'img'
|
key = 'img'
|
||||||
|
@ -136,16 +154,9 @@ class Eynollah:
|
||||||
key += '_grayscale'
|
key += '_grayscale'
|
||||||
if uint8:
|
if uint8:
|
||||||
key += '_uint8'
|
key += '_uint8'
|
||||||
if key not in self._imgs:
|
|
||||||
if grayscale:
|
|
||||||
img = cv2.imread(self.image_filename, cv2.IMREAD_GRAYSCALE)
|
|
||||||
else:
|
|
||||||
img = cv2.imread(self.image_filename)
|
|
||||||
if uint8:
|
|
||||||
img = img.astype(np.uint8)
|
|
||||||
self._imgs[key] = img
|
|
||||||
return self._imgs[key].copy()
|
return self._imgs[key].copy()
|
||||||
|
|
||||||
|
|
||||||
def predict_enhancement(self, img):
|
def predict_enhancement(self, img):
|
||||||
self.logger.debug("enter predict_enhancement")
|
self.logger.debug("enter predict_enhancement")
|
||||||
model_enhancement, session_enhancement = self.start_new_session_and_model(self.model_dir_of_enhancement)
|
model_enhancement, session_enhancement = self.start_new_session_and_model(self.model_dir_of_enhancement)
|
||||||
|
@ -346,10 +357,7 @@ class Eynollah:
|
||||||
|
|
||||||
def resize_and_enhance_image_with_column_classifier(self):
|
def resize_and_enhance_image_with_column_classifier(self):
|
||||||
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
|
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
|
||||||
try:
|
dpi = self.dpi
|
||||||
dpi = check_dpi(self.image_filename)
|
|
||||||
except:
|
|
||||||
dpi = 230
|
|
||||||
self.logger.info("Detected %s DPI", dpi)
|
self.logger.info("Detected %s DPI", dpi)
|
||||||
img = self.imread()
|
img = self.imread()
|
||||||
|
|
||||||
|
@ -1503,7 +1511,6 @@ class Eynollah:
|
||||||
scale = 1
|
scale = 1
|
||||||
if is_image_enhanced:
|
if is_image_enhanced:
|
||||||
if self.allow_enhancement:
|
if self.allow_enhancement:
|
||||||
cv2.imwrite(os.path.join(self.dir_out, self.image_filename_stem) + ".tif", img_res)
|
|
||||||
img_res = img_res.astype(np.uint8)
|
img_res = img_res.astype(np.uint8)
|
||||||
self.get_image_and_scales(img_org, img_res, scale)
|
self.get_image_and_scales(img_org, img_res, scale)
|
||||||
else:
|
else:
|
||||||
|
|
54
qurator/eynollah/ocrd-tool.json
Normal file
54
qurator/eynollah/ocrd-tool.json
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
{
|
||||||
|
"version": "0.0.1",
|
||||||
|
"git_url": "https://github.com/qurator-spk/eynollah",
|
||||||
|
"tools": {
|
||||||
|
"ocrd-eynollah-segment": {
|
||||||
|
"executable": "ocrd-eynollah-segment",
|
||||||
|
"categories": ["Layout analysis"],
|
||||||
|
"description": "Segment page into regions and lines and do reading order detection with eynollah",
|
||||||
|
"input_file_grp": ["OCR-D-IMG", "OCR-D-SEG-PAGE", "OCR-D-GT-SEG-PAGE"],
|
||||||
|
"output_file_grp": ["OCR-D-SEG-LINE"],
|
||||||
|
"steps": ["layout/segmentation/region", "layout/segmentation/line"],
|
||||||
|
"parameters": {
|
||||||
|
"models": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "file",
|
||||||
|
"cacheable": true,
|
||||||
|
"description": "Path to directory containing models to be used (See https://qurator-data.de/eynollah)",
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
|
"dpi": {
|
||||||
|
"type": "number",
|
||||||
|
"format": "float",
|
||||||
|
"description": "pixel density in dots per inch (overrides any meta-data in the images); ignored if <= 0 (with fall-back 230)",
|
||||||
|
"default": 0
|
||||||
|
},
|
||||||
|
"full_layout": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": true,
|
||||||
|
"description": "Try to detect all element subtypes, including drop-caps and headings"
|
||||||
|
},
|
||||||
|
"curved_line": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false,
|
||||||
|
"description": "try to return contour of textlines instead of just rectangle bounding box. Needs more processing time"
|
||||||
|
},
|
||||||
|
"allow_enhancement": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": true,
|
||||||
|
"description": "if the input image has less than 300 DPI, then upscale and enhance"
|
||||||
|
},
|
||||||
|
"allow_scaling": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false,
|
||||||
|
"description": "check the resolution against the number of detected columns and if needed, scale the image up or down during layout detection (heuristic to improve quality and performance)"
|
||||||
|
},
|
||||||
|
"headers_off": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false,
|
||||||
|
"description": "ignore the special role of headings during reading order detection"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
11
qurator/eynollah/ocrd_cli.py
Normal file
11
qurator/eynollah/ocrd_cli.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
from .processor import EynollahProcessor
|
||||||
|
from click import command
|
||||||
|
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
||||||
|
|
||||||
|
@command()
|
||||||
|
@ocrd_cli_options
|
||||||
|
def main(*args, **kwargs):
|
||||||
|
return ocrd_cli_wrap_processor(EynollahProcessor, *args, **kwargs)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -21,7 +21,6 @@ class EynollahPlotter():
|
||||||
dir_of_deskewed,
|
dir_of_deskewed,
|
||||||
dir_of_layout,
|
dir_of_layout,
|
||||||
dir_of_cropped_images,
|
dir_of_cropped_images,
|
||||||
image_filename,
|
|
||||||
image_filename_stem,
|
image_filename_stem,
|
||||||
image_org=None,
|
image_org=None,
|
||||||
scale_x=1,
|
scale_x=1,
|
||||||
|
@ -31,7 +30,6 @@ class EynollahPlotter():
|
||||||
self.dir_of_layout = dir_of_layout
|
self.dir_of_layout = dir_of_layout
|
||||||
self.dir_of_cropped_images = dir_of_cropped_images
|
self.dir_of_cropped_images = dir_of_cropped_images
|
||||||
self.dir_of_deskewed = dir_of_deskewed
|
self.dir_of_deskewed = dir_of_deskewed
|
||||||
self.image_filename = image_filename
|
|
||||||
self.image_filename_stem = image_filename_stem
|
self.image_filename_stem = image_filename_stem
|
||||||
# XXX TODO hacky these cannot be set at init time
|
# XXX TODO hacky these cannot be set at init time
|
||||||
self.image_org = image_org
|
self.image_org = image_org
|
||||||
|
|
65
qurator/eynollah/processor.py
Normal file
65
qurator/eynollah/processor.py
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
from json import loads
|
||||||
|
from pkg_resources import resource_string
|
||||||
|
from tempfile import NamedTemporaryFile
|
||||||
|
from pathlib import Path
|
||||||
|
from os.path import join
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from ocrd import Processor
|
||||||
|
from ocrd_modelfactory import page_from_file, exif_from_filename
|
||||||
|
from ocrd_models import OcrdFile, OcrdExif
|
||||||
|
from ocrd_models.ocrd_page import to_xml
|
||||||
|
from ocrd_utils import (
|
||||||
|
getLogger,
|
||||||
|
MIMETYPE_PAGE,
|
||||||
|
assert_file_grp_cardinality,
|
||||||
|
make_file_id
|
||||||
|
)
|
||||||
|
|
||||||
|
from .eynollah import Eynollah
|
||||||
|
from .utils.pil_cv2 import pil2cv
|
||||||
|
|
||||||
|
OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8'))
|
||||||
|
|
||||||
|
class EynollahProcessor(Processor):
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd-eynollah-segment']
|
||||||
|
kwargs['version'] = OCRD_TOOL['version']
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def process(self):
|
||||||
|
LOG = getLogger('eynollah')
|
||||||
|
assert_file_grp_cardinality(self.input_file_grp, 1)
|
||||||
|
assert_file_grp_cardinality(self.output_file_grp, 1)
|
||||||
|
for n, input_file in enumerate(self.input_files):
|
||||||
|
page_id = input_file.pageId or input_file.ID
|
||||||
|
LOG.info("INPUT FILE %s (%d/%d) ", page_id, n + 1, len(self.input_files))
|
||||||
|
pcgts = page_from_file(self.workspace.download_file(input_file))
|
||||||
|
self.add_metadata(pcgts)
|
||||||
|
page = pcgts.get_Page()
|
||||||
|
# XXX loses DPI information
|
||||||
|
# page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized')
|
||||||
|
image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(url=page.imageFilename))).local_filename
|
||||||
|
eynollah_kwargs = {
|
||||||
|
'dir_models': self.resolve_resource(self.parameter['models']),
|
||||||
|
'allow_enhancement': self.parameter['allow_enhancement'],
|
||||||
|
'curved_line': self.parameter['curved_line'],
|
||||||
|
'full_layout': self.parameter['full_layout'],
|
||||||
|
'allow_scaling': self.parameter['allow_scaling'],
|
||||||
|
'headers_off': self.parameter['headers_off'],
|
||||||
|
'override_dpi': self.parameter['dpi'],
|
||||||
|
'logger': LOG,
|
||||||
|
'pcgts': pcgts,
|
||||||
|
'image_filename': image_filename
|
||||||
|
}
|
||||||
|
Eynollah(**eynollah_kwargs).run()
|
||||||
|
file_id = make_file_id(input_file, self.output_file_grp)
|
||||||
|
self.workspace.add_file(
|
||||||
|
ID=file_id,
|
||||||
|
file_grp=self.output_file_grp,
|
||||||
|
pageId=page_id,
|
||||||
|
mimetype=MIMETYPE_PAGE,
|
||||||
|
local_filename=join(self.output_file_grp, file_id) + '.xml',
|
||||||
|
content=to_xml(pcgts))
|
|
@ -1,12 +1,12 @@
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ocrd_models import OcrdExif
|
from ocrd_models import OcrdExif
|
||||||
from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, cvtColor, imread
|
from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, COLOR_BGR2RGB, cvtColor, imread
|
||||||
|
|
||||||
# from sbb_binarization
|
# from sbb_binarization
|
||||||
|
|
||||||
def cv2pil(img):
|
def cv2pil(img):
|
||||||
return Image.fromarray(img.astype('uint8'))
|
return Image.fromarray(np.array(cvtColor(img, COLOR_BGR2RGB)))
|
||||||
|
|
||||||
def pil2cv(img):
|
def pil2cv(img):
|
||||||
# from ocrd/workspace.py
|
# from ocrd/workspace.py
|
||||||
|
@ -14,11 +14,21 @@ def pil2cv(img):
|
||||||
pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
|
pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
|
||||||
return cvtColor(pil_as_np_array, color_conversion)
|
return cvtColor(pil_as_np_array, color_conversion)
|
||||||
|
|
||||||
def check_dpi(image_filename):
|
def check_dpi(img):
|
||||||
exif = OcrdExif(Image.open(image_filename))
|
try:
|
||||||
print(exif.to_xml())
|
if isinstance(img, Image.__class__):
|
||||||
resolution = exif.resolution
|
pil_image = img
|
||||||
if exif.resolutionUnit == 'cm':
|
elif isinstance(img, str):
|
||||||
resolution /= 2.54
|
pil_image = Image.open(img)
|
||||||
return int(resolution)
|
else:
|
||||||
|
pil_image = cv2pil(img)
|
||||||
|
exif = OcrdExif(pil_image)
|
||||||
|
resolution = exif.resolution
|
||||||
|
if resolution == 1:
|
||||||
|
raise Exception()
|
||||||
|
if exif.resolutionUnit == 'cm':
|
||||||
|
resolution /= 2.54
|
||||||
|
return int(resolution)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
return 230
|
||||||
|
|
|
@ -28,14 +28,17 @@ class EynollahXmlWriter():
|
||||||
self.counter = EynollahIdCounter()
|
self.counter = EynollahIdCounter()
|
||||||
self.dir_out = dir_out
|
self.dir_out = dir_out
|
||||||
self.image_filename = image_filename
|
self.image_filename = image_filename
|
||||||
self.image_filename_stem = Path(Path(image_filename).name).stem
|
|
||||||
self.curved_line = curved_line
|
self.curved_line = curved_line
|
||||||
self.pcgts = pcgts if pcgts else PcGtsType()
|
self.pcgts = pcgts
|
||||||
self.scale_x = None # XXX set outside __init__
|
self.scale_x = None # XXX set outside __init__
|
||||||
self.scale_y = None # XXX set outside __init__
|
self.scale_y = None # XXX set outside __init__
|
||||||
self.height_org = None # XXX set outside __init__
|
self.height_org = None # XXX set outside __init__
|
||||||
self.width_org = None # XXX set outside __init__
|
self.width_org = None # XXX set outside __init__
|
||||||
|
|
||||||
|
@property
|
||||||
|
def image_filename_stem(self):
|
||||||
|
return Path(Path(self.image_filename).name).stem
|
||||||
|
|
||||||
def calculate_page_coords(self, cont_page):
|
def calculate_page_coords(self, cont_page):
|
||||||
self.logger.debug('enter calculate_page_coords')
|
self.logger.debug('enter calculate_page_coords')
|
||||||
points_page_print = ""
|
points_page_print = ""
|
||||||
|
@ -141,7 +144,7 @@ class EynollahXmlWriter():
|
||||||
self.logger.debug('enter build_pagexml_no_full_layout')
|
self.logger.debug('enter build_pagexml_no_full_layout')
|
||||||
|
|
||||||
# create the file structure
|
# create the file structure
|
||||||
pcgts = create_page_xml(self.image_filename, self.height_org, self.width_org)
|
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
|
||||||
page = pcgts.get_Page()
|
page = pcgts.get_Page()
|
||||||
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
|
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
|
||||||
|
|
||||||
|
@ -181,7 +184,7 @@ class EynollahXmlWriter():
|
||||||
self.logger.debug('enter build_pagexml_full_layout')
|
self.logger.debug('enter build_pagexml_full_layout')
|
||||||
|
|
||||||
# create the file structure
|
# create the file structure
|
||||||
pcgts = create_page_xml(self.image_filename, self.height_org, self.width_org)
|
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
|
||||||
page = pcgts.get_Page()
|
page = pcgts.get_Page()
|
||||||
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
|
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
|
||||||
|
|
||||||
|
|
5
setup.py
5
setup.py
|
@ -13,10 +13,13 @@ setup(
|
||||||
namespace_packages=['qurator'],
|
namespace_packages=['qurator'],
|
||||||
packages=find_packages(exclude=['tests']),
|
packages=find_packages(exclude=['tests']),
|
||||||
install_requires=install_requires,
|
install_requires=install_requires,
|
||||||
|
package_data={
|
||||||
|
'': ['*.json']
|
||||||
|
},
|
||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
'eynollah=qurator.eynollah.cli:main',
|
'eynollah=qurator.eynollah.cli:main',
|
||||||
# 'ocrd-eynollah=qurator.eynollah.ocrd_cli:cli',
|
'ocrd-eynollah-segment=qurator.eynollah.ocrd_cli:main',
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
|
import cv2
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from qurator.eynollah.utils.pil_cv2 import check_dpi
|
from qurator.eynollah.utils.pil_cv2 import check_dpi
|
||||||
from tests.base import main
|
from tests.base import main
|
||||||
|
|
||||||
def test_dpi():
|
def test_dpi():
|
||||||
fpath = Path(__file__).parent.joinpath('resources', 'kant_aufklaerung_1784_0020.tif')
|
fpath = str(Path(__file__).parent.joinpath('resources', 'kant_aufklaerung_1784_0020.tif'))
|
||||||
assert 300 == check_dpi(str(fpath))
|
assert 230 == check_dpi(cv2.imread(fpath))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(__file__)
|
main(__file__)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue