|
|
@ -6,14 +6,18 @@
|
|
|
|
document layout analysis (segmentation) with output in PAGE-XML
|
|
|
|
document layout analysis (segmentation) with output in PAGE-XML
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from logging import Logger
|
|
|
|
import math
|
|
|
|
import math
|
|
|
|
import os
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
import time
|
|
|
|
|
|
|
|
from typing import Optional
|
|
|
|
import warnings
|
|
|
|
import warnings
|
|
|
|
from pathlib import Path
|
|
|
|
from pathlib import Path
|
|
|
|
from multiprocessing import Process, Queue, cpu_count
|
|
|
|
from multiprocessing import Process, Queue, cpu_count
|
|
|
|
import gc
|
|
|
|
import gc
|
|
|
|
|
|
|
|
from PIL.Image import Image
|
|
|
|
|
|
|
|
from ocrd import OcrdPage
|
|
|
|
from ocrd_utils import getLogger
|
|
|
|
from ocrd_utils import getLogger
|
|
|
|
import cv2
|
|
|
|
import cv2
|
|
|
|
import numpy as np
|
|
|
|
import numpy as np
|
|
|
@ -142,32 +146,32 @@ class PatchEncoder(layers.Layer):
|
|
|
|
class Eynollah:
|
|
|
|
class Eynollah:
|
|
|
|
def __init__(
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
self,
|
|
|
|
dir_models,
|
|
|
|
dir_models : str,
|
|
|
|
image_filename=None,
|
|
|
|
logger : Logger,
|
|
|
|
image_pil=None,
|
|
|
|
image_filename : Optional[str] = None,
|
|
|
|
image_filename_stem=None,
|
|
|
|
image_pil : Optional[Image] = None,
|
|
|
|
dir_out=None,
|
|
|
|
image_filename_stem : Optional[str] = None,
|
|
|
|
dir_in=None,
|
|
|
|
dir_out : Optional[str] = None,
|
|
|
|
dir_of_cropped_images=None,
|
|
|
|
dir_in : Optional[str] = None,
|
|
|
|
dir_of_layout=None,
|
|
|
|
dir_of_cropped_images : Optional[str] = None,
|
|
|
|
dir_of_deskewed=None,
|
|
|
|
dir_of_layout : Optional[str] = None,
|
|
|
|
dir_of_all=None,
|
|
|
|
dir_of_deskewed : Optional[str] = None,
|
|
|
|
dir_save_page=None,
|
|
|
|
dir_of_all : Optional[str] = None,
|
|
|
|
enable_plotting=False,
|
|
|
|
dir_save_page : Optional[str] = None,
|
|
|
|
allow_enhancement=False,
|
|
|
|
enable_plotting : bool = False,
|
|
|
|
curved_line=False,
|
|
|
|
allow_enhancement : bool = False,
|
|
|
|
textline_light=False,
|
|
|
|
curved_line : bool = False,
|
|
|
|
full_layout=False,
|
|
|
|
textline_light : bool = False,
|
|
|
|
tables=False,
|
|
|
|
full_layout : bool = False,
|
|
|
|
right2left=False,
|
|
|
|
tables : bool = False,
|
|
|
|
input_binary=False,
|
|
|
|
right2left : bool = False,
|
|
|
|
allow_scaling=False,
|
|
|
|
input_binary : bool = False,
|
|
|
|
headers_off=False,
|
|
|
|
allow_scaling : bool = False,
|
|
|
|
light_version=False,
|
|
|
|
headers_off : bool = False,
|
|
|
|
ignore_page_extraction=False,
|
|
|
|
light_version : bool = False,
|
|
|
|
override_dpi=None,
|
|
|
|
ignore_page_extraction : bool = False,
|
|
|
|
logger=None,
|
|
|
|
override_dpi : Optional[int] = None,
|
|
|
|
pcgts=None,
|
|
|
|
pcgts : Optional[OcrdPage] = None,
|
|
|
|
):
|
|
|
|
):
|
|
|
|
if not dir_in:
|
|
|
|
if not dir_in:
|
|
|
|
if image_pil:
|
|
|
|
if image_pil:
|
|
|
@ -213,7 +217,7 @@ class Eynollah:
|
|
|
|
curved_line=self.curved_line,
|
|
|
|
curved_line=self.curved_line,
|
|
|
|
textline_light = self.textline_light,
|
|
|
|
textline_light = self.textline_light,
|
|
|
|
pcgts=pcgts)
|
|
|
|
pcgts=pcgts)
|
|
|
|
self.logger = logger if logger else getLogger('eynollah')
|
|
|
|
self.logger = logger
|
|
|
|
self.dir_models = dir_models
|
|
|
|
self.dir_models = dir_models
|
|
|
|
|
|
|
|
|
|
|
|
self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425"
|
|
|
|
self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425"
|
|
|
|