Merge 21ec4fbfb5 into 3dcbb20cac

2025-12-14 07:04:21 +01:00 · 2025-05-07 12:04:07 +00:00 · 2025-05-07 12:04:07 +00:00 · 8e2d0f3179
commit 8e2d0f3179
parent 3dcbb20cac 21ec4fbfb5
3 changed files with 59 additions and 14 deletions
--- a/README.md
+++ b/README.md
@ -50,10 +50,16 @@ For documentation on methods and models, have a look at [`models.md`](https://gi
 In case you want to train your own model with Eynollah, have a look at [`train.md`](https://github.com/qurator-spk/eynollah/tree/main/docs/train.md).

 ## Usage
-The command-line interface can be called like this:
+
+Eynollah has four key use cases: layout analysis, binarization, OCR, and machine-based reading order.
+
+### Layout
+The layout module is responsible for detecting layouts, identifying text lines, and determining reading order using both heuristic methods or a machine-based reading order detection model. It's important to note that this functionality should not be confused with the machine-based-reading-order use case. The latter, still under development, focuses specifically on determining the reading order for a given layout in an XML file. In contrast, layout detection takes an image as input, and after detecting the layout, it can also determine the reading order using a machine-based model.
+
+The command-line interface for layout can be called like this:

 ```sh
-eynollah \
+eynollah layout \
  -i <single image file> | -di <directory containing image files> \
  -o <output directory> \
  -m <directory containing model files> \
@ -66,6 +72,7 @@ The following options can be used to further configure the processing:
 |-------------------|:-------------------------------------------------------------------------------|
 | `-fl`             | full layout analysis including all steps and segmentation classes              |
 | `-light`          | lighter and faster but simpler method for main region detection and deskewing  |
+| `-tll`            | this indicates the light textline and should be passed with light version      |
 | `-tab`            | apply table detection                                                          |
 | `-ae`             | apply enhancement (the resulting image is saved to the output directory)       |
 | `-as`             | apply scaling                                                                  |
@ -83,6 +90,34 @@ The following options can be used to further configure the processing:
 If no option is set, the tool performs layout detection of main regions (background, text, images, separators and marginals).
 The best output quality is produced when RGB images are used as input rather than greyscale or binarized images.

+### Binarization
+Document Image Binarization
+
+The command-line interface for binarization of single image can be called like this:
+
+```sh
+eynollah binarization \
+  -m <path to directory containing model files> \
+  <input image> \
+  <output image>
+```
+
+and for flowing from a directory like this:
+
+```sh
+eynollah binarization \
+  -m <path to directory containing model files> \
+  -di <directory containing image files> \
+  -do <output directory>
+```
+
+### OCR
+Under development
+
+### Machine-based-reading-order
+Under development
+
+
 #### Use as OCR-D processor

 Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) [processor](https://ocr-d.de/en/spec/cli),
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -4343,12 +4343,12 @@ class Eynollah:
            polygons_lines_xml = []
            contours_tables = []
            ocr_all_textlines = None
-            conf_contours_textregions =None
+            conf_contours_textregions = [0]
            pcgts = self.writer.build_pagexml_no_full_layout(
                cont_page, page_coord, order_text_new, id_of_texts_tot,
                all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
                all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
-                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions)
+                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions, self.skip_layout_and_reading_order)
            return pcgts

        #print("text region early -1 in %.1fs", time.time() - t0)
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@ -168,7 +168,7 @@ class EynollahXmlWriter():
        with open(self.output_filename, 'w') as f:
            f.write(to_xml(pcgts))

-    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion):
+    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion, skip_layout_reading_order=False):
        self.logger.debug('enter build_pagexml_no_full_layout')

        # create the file structure
@ -184,7 +184,7 @@ class EynollahXmlWriter():

        for mm in range(len(found_polygons_text_region)):
            textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
-                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord), conf=conf_contours_textregion[mm]),
+                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord, skip_layout_reading_order), conf=conf_contours_textregion[mm]),
                    )
            #textregion.set_conf(conf_contours_textregion[mm])
            page.add_TextRegion(textregion)
@ -303,18 +303,28 @@ class EynollahXmlWriter():

        return pcgts

-    def calculate_polygon_coords(self, contour, page_coord):
+    def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False):
        self.logger.debug('enter calculate_polygon_coords')
        coords = ''
        for value_bbox in contour:
-            if len(value_bbox) == 2:
-                coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
-                coords += ','
-                coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y))
+            if skip_layout_reading_order:
+                if len(value_bbox) == 2:
+                    coords += str(int((value_bbox[0]) / self.scale_x))
+                    coords += ','
+                    coords += str(int((value_bbox[1]) / self.scale_y))
+                else:
+                    coords += str(int((value_bbox[0][0]) / self.scale_x))
+                    coords += ','
+                    coords += str(int((value_bbox[0][1]) / self.scale_y))
            else:
-                coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x))
-                coords += ','
-                coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y))
+                if len(value_bbox) == 2:
+                    coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
+                    coords += ','
+                    coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y))
+                else:
+                    coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x))
+                    coords += ','
+                    coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y))
            coords=coords + ' '
        return coords[:-1]