mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-12-14 15:14:18 +01:00
Merge 21ec4fbfb5 into 3dcbb20cac
This commit is contained in:
commit
8e2d0f3179
3 changed files with 59 additions and 14 deletions
39
README.md
39
README.md
|
|
@ -50,10 +50,16 @@ For documentation on methods and models, have a look at [`models.md`](https://gi
|
||||||
In case you want to train your own model with Eynollah, have a look at [`train.md`](https://github.com/qurator-spk/eynollah/tree/main/docs/train.md).
|
In case you want to train your own model with Eynollah, have a look at [`train.md`](https://github.com/qurator-spk/eynollah/tree/main/docs/train.md).
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
The command-line interface can be called like this:
|
|
||||||
|
Eynollah has four key use cases: layout analysis, binarization, OCR, and machine-based reading order.
|
||||||
|
|
||||||
|
### Layout
|
||||||
|
The layout module is responsible for detecting layouts, identifying text lines, and determining reading order using both heuristic methods or a machine-based reading order detection model. It's important to note that this functionality should not be confused with the machine-based-reading-order use case. The latter, still under development, focuses specifically on determining the reading order for a given layout in an XML file. In contrast, layout detection takes an image as input, and after detecting the layout, it can also determine the reading order using a machine-based model.
|
||||||
|
|
||||||
|
The command-line interface for layout can be called like this:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
eynollah \
|
eynollah layout \
|
||||||
-i <single image file> | -di <directory containing image files> \
|
-i <single image file> | -di <directory containing image files> \
|
||||||
-o <output directory> \
|
-o <output directory> \
|
||||||
-m <directory containing model files> \
|
-m <directory containing model files> \
|
||||||
|
|
@ -66,6 +72,7 @@ The following options can be used to further configure the processing:
|
||||||
|-------------------|:-------------------------------------------------------------------------------|
|
|-------------------|:-------------------------------------------------------------------------------|
|
||||||
| `-fl` | full layout analysis including all steps and segmentation classes |
|
| `-fl` | full layout analysis including all steps and segmentation classes |
|
||||||
| `-light` | lighter and faster but simpler method for main region detection and deskewing |
|
| `-light` | lighter and faster but simpler method for main region detection and deskewing |
|
||||||
|
| `-tll` | this indicates the light textline and should be passed with light version |
|
||||||
| `-tab` | apply table detection |
|
| `-tab` | apply table detection |
|
||||||
| `-ae` | apply enhancement (the resulting image is saved to the output directory) |
|
| `-ae` | apply enhancement (the resulting image is saved to the output directory) |
|
||||||
| `-as` | apply scaling |
|
| `-as` | apply scaling |
|
||||||
|
|
@ -83,6 +90,34 @@ The following options can be used to further configure the processing:
|
||||||
If no option is set, the tool performs layout detection of main regions (background, text, images, separators and marginals).
|
If no option is set, the tool performs layout detection of main regions (background, text, images, separators and marginals).
|
||||||
The best output quality is produced when RGB images are used as input rather than greyscale or binarized images.
|
The best output quality is produced when RGB images are used as input rather than greyscale or binarized images.
|
||||||
|
|
||||||
|
### Binarization
|
||||||
|
Document Image Binarization
|
||||||
|
|
||||||
|
The command-line interface for binarization of single image can be called like this:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
eynollah binarization \
|
||||||
|
-m <path to directory containing model files> \
|
||||||
|
<input image> \
|
||||||
|
<output image>
|
||||||
|
```
|
||||||
|
|
||||||
|
and for flowing from a directory like this:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
eynollah binarization \
|
||||||
|
-m <path to directory containing model files> \
|
||||||
|
-di <directory containing image files> \
|
||||||
|
-do <output directory>
|
||||||
|
```
|
||||||
|
|
||||||
|
### OCR
|
||||||
|
Under development
|
||||||
|
|
||||||
|
### Machine-based-reading-order
|
||||||
|
Under development
|
||||||
|
|
||||||
|
|
||||||
#### Use as OCR-D processor
|
#### Use as OCR-D processor
|
||||||
|
|
||||||
Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) [processor](https://ocr-d.de/en/spec/cli),
|
Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) [processor](https://ocr-d.de/en/spec/cli),
|
||||||
|
|
|
||||||
|
|
@ -4343,12 +4343,12 @@ class Eynollah:
|
||||||
polygons_lines_xml = []
|
polygons_lines_xml = []
|
||||||
contours_tables = []
|
contours_tables = []
|
||||||
ocr_all_textlines = None
|
ocr_all_textlines = None
|
||||||
conf_contours_textregions =None
|
conf_contours_textregions = [0]
|
||||||
pcgts = self.writer.build_pagexml_no_full_layout(
|
pcgts = self.writer.build_pagexml_no_full_layout(
|
||||||
cont_page, page_coord, order_text_new, id_of_texts_tot,
|
cont_page, page_coord, order_text_new, id_of_texts_tot,
|
||||||
all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
|
all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
|
||||||
all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
|
all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
|
||||||
cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions)
|
cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions, self.skip_layout_and_reading_order)
|
||||||
return pcgts
|
return pcgts
|
||||||
|
|
||||||
#print("text region early -1 in %.1fs", time.time() - t0)
|
#print("text region early -1 in %.1fs", time.time() - t0)
|
||||||
|
|
|
||||||
|
|
@ -168,7 +168,7 @@ class EynollahXmlWriter():
|
||||||
with open(self.output_filename, 'w') as f:
|
with open(self.output_filename, 'w') as f:
|
||||||
f.write(to_xml(pcgts))
|
f.write(to_xml(pcgts))
|
||||||
|
|
||||||
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion):
|
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion, skip_layout_reading_order=False):
|
||||||
self.logger.debug('enter build_pagexml_no_full_layout')
|
self.logger.debug('enter build_pagexml_no_full_layout')
|
||||||
|
|
||||||
# create the file structure
|
# create the file structure
|
||||||
|
|
@ -184,7 +184,7 @@ class EynollahXmlWriter():
|
||||||
|
|
||||||
for mm in range(len(found_polygons_text_region)):
|
for mm in range(len(found_polygons_text_region)):
|
||||||
textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
|
textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
|
||||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord), conf=conf_contours_textregion[mm]),
|
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord, skip_layout_reading_order), conf=conf_contours_textregion[mm]),
|
||||||
)
|
)
|
||||||
#textregion.set_conf(conf_contours_textregion[mm])
|
#textregion.set_conf(conf_contours_textregion[mm])
|
||||||
page.add_TextRegion(textregion)
|
page.add_TextRegion(textregion)
|
||||||
|
|
@ -303,10 +303,20 @@ class EynollahXmlWriter():
|
||||||
|
|
||||||
return pcgts
|
return pcgts
|
||||||
|
|
||||||
def calculate_polygon_coords(self, contour, page_coord):
|
def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False):
|
||||||
self.logger.debug('enter calculate_polygon_coords')
|
self.logger.debug('enter calculate_polygon_coords')
|
||||||
coords = ''
|
coords = ''
|
||||||
for value_bbox in contour:
|
for value_bbox in contour:
|
||||||
|
if skip_layout_reading_order:
|
||||||
|
if len(value_bbox) == 2:
|
||||||
|
coords += str(int((value_bbox[0]) / self.scale_x))
|
||||||
|
coords += ','
|
||||||
|
coords += str(int((value_bbox[1]) / self.scale_y))
|
||||||
|
else:
|
||||||
|
coords += str(int((value_bbox[0][0]) / self.scale_x))
|
||||||
|
coords += ','
|
||||||
|
coords += str(int((value_bbox[0][1]) / self.scale_y))
|
||||||
|
else:
|
||||||
if len(value_bbox) == 2:
|
if len(value_bbox) == 2:
|
||||||
coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
|
coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
|
||||||
coords += ','
|
coords += ','
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue