diff --git a/src/eynollah/processor.py b/src/eynollah/processor.py index 3380b24..f4db854 100644 --- a/src/eynollah/processor.py +++ b/src/eynollah/processor.py @@ -16,6 +16,27 @@ class EynollahProcessor(Processor): raise ValueError("Error: You set parameter 'textline_light' to enable light textline detection but parameter 'light_mode' is not enabled") def process_page_pcgts(self, *input_pcgts: Optional[OcrdPage], page_id: Optional[str] = None) -> OcrdPageResult: + """ + Performs cropping, region and line segmentation with Eynollah. + + For each page, open and deserialize PAGE input file (from existing + PAGE file in the input fileGrp, or generated from image file). + Retrieve its respective page-level image (ignoring annotation that + already added `binarized`, `cropped` or `deskewed` features). + + Set up Eynollah to detect regions and lines, and add each one to the + page, respectively. + + \b + - If ``tables``, try to detect table blocks and add them as TableRegion. + - If ``full_layout``, then in addition to paragraphs and marginals, also + try to detect drop capitals and headings. + - If ``ignore_page_extraction``, then attempt no cropping of the page. + - If ``curved_line``, then compute contour polygons for text lines + instead of simple bounding boxes. + + Produce a new output file by serialising the resulting hierarchy. + """ assert input_pcgts assert input_pcgts[0] assert self.parameter