Merge pull request #138 from qurator-spk/extracting_images_only

Extracting images only
2026-01-29 13:46:58 +01:00 · 2024-11-05 22:10:51 +01:00 · 2024-11-05 22:10:51 +01:00 · bceeeb56c1
commit bceeeb56c1
parent 751b0102f7 f7e5fb917f
34 changed files with 778 additions and 723 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -1,51 +0,0 @@
-version: 2
-
-jobs:
-
-  build-python37:
-    machine:
-      - image: ubuntu-2004:2023.02.1
-
-    steps:
-      - checkout
-      - restore_cache:
-          keys:
-            - model-cache
-      - run: make models
-      - save_cache:
-          key: model-cache
-          paths:
-            models_eynollah.tar.gz
-            models_eynollah
-      - run:
-          name: "Set Python Version"
-          command: pyenv install -s 3.7.16 && pyenv global 3.7.16
-      - run: make install
-      - run: make smoke-test
-
-  build-python38:
-    machine:
-      - image: ubuntu-2004:2023.02.1
-    steps:
-      - checkout
-      - restore_cache:
-          keys:
-            - model-cache
-      - run: make models
-      - save_cache:
-          key: model-cache
-          paths:
-            models_eynollah.tar.gz
-            models_eynollah
-      - run:
-          name: "Set Python Version"
-          command: pyenv install -s 3.8.16 && pyenv global 3.8.16           
-      - run: make install
-      - run: make smoke-test
-
-workflows:
-  version: 2
-  build:
-    jobs:
-      # - build-python37
-      - build-python38
--- a/.github/workflows/test-eynollah.yml
+++ b/.github/workflows/test-eynollah.yml
@ -1,7 +1,7 @@
 # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

-name: Python package
+name: Test

 on: [push]

@ -14,8 +14,8 @@ jobs:
        python-version: ['3.8', '3.9', '3.10', '3.11']

    steps:
-    - uses: actions/checkout@v2
-    - uses: actions/cache@v2
+    - uses: actions/checkout@v4
+    - uses: actions/cache@v4
      id: model_cache
      with:
        path: models_eynollah
@ -24,7 +24,7 @@ jobs:
      if: steps.model_cache.outputs.cache-hit != 'true'
      run: make models
    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v5
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install dependencies
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,6 +5,14 @@ Versioned according to [Semantic Versioning](http://semver.org/).

 ## Unreleased

+## [0.3.1] - 2024-08-27
+
+Fixed:
+
+  * regression in OCR-D processor, #106
+  * Expected Ptrcv::UMat for argument 'contour', #110
+  * Memory usage explosion with very narrow images (e.g. book spine), #67
+
 ## [0.3.0] - 2023-05-13

 Changed:
@ -117,6 +125,8 @@ Fixed:
 Initial release

 <!-- link-labels -->
+[0.3.1]: ../../compare/v0.3.1...v0.3.0
+[0.3.0]: ../../compare/v0.3.0...v0.2.0
 [0.2.0]: ../../compare/v0.2.0...v0.1.0
 [0.1.0]: ../../compare/v0.1.0...v0.0.11
 [0.0.11]: ../../compare/v0.0.11...v0.0.10
--- a/8
+++ b/8
@ -22,16 +22,14 @@ help:
 models: models_eynollah

 models_eynollah: models_eynollah.tar.gz
-	# tar xf models_eynollah_renamed.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/'
-	# tar xf models_eynollah_renamed.tar.gz
-	# tar xf 2022-04-05.SavedModel.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/'
 	tar xf models_eynollah.tar.gz

 models_eynollah.tar.gz:
 	# wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz'
 	# wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz'
-	# wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz'
-	wget https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
+	# wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed_savedmodel.tar.gz'
+	# wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz'
+	wget 'https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz'

 # Install with pip
 install:
--- a/README.md
+++ b/README.md
@ -1,10 +1,10 @@
 # Eynollah
-> Document Layout Analysis (segmentation) using pre-trained models and heuristics
+> Document Layout Analysis with Deep Learning and Heuristics

 [![PyPI Version](https://img.shields.io/pypi/v/eynollah)](https://pypi.org/project/eynollah/)
-[![CircleCI Build Status](https://circleci.com/gh/qurator-spk/eynollah.svg?style=shield)](https://circleci.com/gh/qurator-spk/eynollah)
 [![GH Actions Test](https://github.com/qurator-spk/eynollah/actions/workflows/test-eynollah.yml/badge.svg)](https://github.com/qurator-spk/eynollah/actions/workflows/test-eynollah.yml)
 [![License: ASL](https://img.shields.io/github/license/qurator-spk/eynollah)](https://opensource.org/license/apache-2-0/)
+[![DOI](https://img.shields.io/badge/DOI-10.1145%2F3604951.3605513-red)](https://doi.org/10.1145/3604951.3605513)

 ![](https://user-images.githubusercontent.com/952378/102350683-8a74db80-3fa5-11eb-8c7e-f743f7d6eae2.jpg)

@ -14,16 +14,18 @@
 * Support for various image optimization operations:
  * cropping (border detection), binarization, deskewing, dewarping, scaling, enhancing, resizing
 * Text line segmentation to bounding boxes or polygons (contours) including for curved lines and vertical text
-* Detection of reading order
+* Detection of reading order (left-to-right or right-to-left)
 * Output in [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML)
 * [OCR-D](https://github.com/qurator-spk/eynollah#use-as-ocr-d-processor) interface

+:warning: Development is currently focused on achieving the best possible quality of results for a wide variety of historical documents and therefore processing can be very slow. We aim to improve this, but contributions are welcome.
+
 ## Installation
-Python versions `3.8-3.11` with Tensorflow versions >=`2.12` are currently supported. 
+Python `3.8-3.11` with Tensorflow `2.12-2.15` on Linux are currently supported.

 For (limited) GPU support the CUDA toolkit needs to be installed.

-You can either install via 
+You can either install from PyPI

 ```
 pip install eynollah
@ -39,25 +41,28 @@ cd eynollah; pip install -e .
 Alternatively, you can run `make install` or `make install-dev` for editable installation.

 ## Models
-Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/).
+Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/) or [huggingface](https://huggingface.co/SBB?search_models=eynollah). 

-In case you want to train your own model to use with Eynollah, have a look at [sbb_pixelwise_segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation). 
+## Train
+🚧 **Work in progress**  
+  
+In case you want to train your own model, have a look at [`sbb_pixelwise_segmentation`](https://github.com/qurator-spk/sbb_pixelwise_segmentation). 

 ## Usage
 The command-line interface can be called like this:

 ```sh
 eynollah \
-  -i <image file> \
+  -i <single image file> | -di <directory containing image files> \
  -o <output directory> \
-  -m <path to directory containing model files> \
+  -m <directory containing model files> \
     [OPTIONS]
 ```

 The following options can be used to further configure the processing:

 | option            | description                                                                    |
-|----------|:-------------|
+|-------------------|:-------------------------------------------------------------------------------|
 | `-fl`             | full layout analysis including all steps and segmentation classes              |
 | `-light`          | lighter and faster but simpler method for main region detection and deskewing  |
 | `-tab`            | apply table detection                                                          |
@ -66,18 +71,19 @@ The following options can be used to further configure the processing:
 | `-cl`             | apply contour detection for curved text lines instead of bounding boxes        |
 | `-ib`             | apply binarization (the resulting image is saved to the output directory)      |
 | `-ep`             | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) |
+| `-eoi`            | extract only images to output directory (other processing will not be done)    |
 | `-ho`             | ignore headers for reading order dectection                                    |
-| `-di <directory>`  | process all images in a directory in batch mode |
 | `-si <directory>` | save image regions detected to this directory                                  |
 | `-sd <directory>` | save deskewed image to this directory                                          |
 | `-sl <directory>` | save layout prediction as plot to this directory                               |
 | `-sp <directory>` | save cropped page image to this directory                                      |
 | `-sa <directory>` | save all (plot, enhanced/binary image, layout) to this directory               |

-If no option is set, the tool will perform layout detection of main regions (background, text, images, separators and marginals).
-The tool produces better quality output when RGB images are used as input than greyscale or binarized images.
+If no option is set, the tool performs layout detection of main regions (background, text, images, separators and marginals).
+The best output quality is produced when RGB images are used as input rather than greyscale or binarized images.

 #### Use as OCR-D processor
+🚧 **Work in progress** 

 Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor. 

@ -95,11 +101,14 @@ ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models
    
 uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps

+#### Additional documentation
+Please check the [wiki](https://github.com/qurator-spk/eynollah/wiki).
+
 ## How to cite
 If you find this tool useful in your work, please consider citing our paper:

 ```bibtex
-@inproceedings{rezanezhad2023eynollah,
+@inproceedings{hip23rezanezhad,
  title     = {Document Layout Analysis with Deep Learning and Heuristics},
  author    = {Rezanezhad, Vahid and Baierer, Konstantin and Gerber, Mike and Labusch, Kai and Neudecker, Clemens},
  booktitle = {Proceedings of the 7th International Workshop on Historical Document Imaging and Processing {HIP} 2023,
--- a/ocrd-tool.json
+++ b/ocrd-tool.json
@ -1 +1 @@
-qurator/eynollah/ocrd-tool.json
+src/eynollah/ocrd-tool.json
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,34 +1,43 @@
 [build-system]
-requires = ["setuptools>=61.0"]
-build-backend = "setuptools.build_meta"
+requires = ["setuptools>=61.0", "wheel", "setuptools-ocrd"]

 [project]
 name = "eynollah"
-version = "0.1.0"
+authors = [
+    {name = "Vahid Rezanezhad"},
+    {name = "Staatsbibliothek zu Berlin - Preußischer Kulturbesitz"},
+]
+description = "Document Layout Analysis"
+readme = "README.md"
+license.file = "LICENSE"
+requires-python = ">=3.8"
+keywords = ["document layout analysis", "image segmentation"]

+dynamic = ["dependencies", "version"]

-
-
-dependencies = [
-    "ocrd >= 2.23.3",
-    "tensorflow == 2.12.1",
-    "scikit-learn >= 0.23.2",
-    "imutils >= 0.5.3",
-    "numpy < 1.24.0",
-    "matplotlib",
-    "torch == 2.0.1",
-    "transformers == 4.30.2",
-    "numba == 0.58.1",
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: Console",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3 :: Only",
+    "Topic :: Scientific/Engineering :: Image Processing",
 ]

 [project.scripts]
-eynollah = "qurator.eynollah.cli:main"
-ocrd-eynollah-segment="qurator.eynollah.ocrd_cli:main"
+eynollah = "eynollah.cli:main"
+ocrd-eynollah-segment = "eynollah.ocrd_cli:main"

+[project.urls]
+Homepage = "https://github.com/qurator-spk/eynollah"
+Repository = "https://github.com/qurator-spk/eynollah.git"
+
+[tool.setuptools.dynamic]
+dependencies = {file = ["requirements.txt"]}

 [tool.setuptools.packages.find]
-where = ["."]
-include = ["qurator"]
+where = ["src"]

 [tool.setuptools.package-data]
 "*" = ["*.json", '*.yml', '*.xml', '*.xsd']
--- a/qurator/init.py
+++ b/qurator/init.py
@ -1 +0,0 @@
-__import__("pkg_resources").declare_namespace(__name__)
--- a/qurator/eynollah/init.py
+++ b/qurator/eynollah/init.py
@ -1 +0,0 @@
-
--- a/src/eynollah/init.py
+++ b/src/eynollah/init.py
--- a/qurator/eynollah/cli.py
+++ b/qurator/eynollah/cli.py
@ -1,8 +1,9 @@
 import sys
 import click
 from ocrd_utils import initLogging, setOverrideLogLevel
-from qurator.eynollah.eynollah import Eynollah
-from qurator.eynollah.sbb_binarize import SbbBinarizer
+from eynollah.eynollah import Eynollah
+from eynollah.eynollah import Eynollah
+from eynollah.sbb_binarize import SbbBinarizer

@click.group()
 def main():
@ -146,6 +147,12 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
    is_flag=True,
    help="If set, will plot intermediary files and images",
 )
+@click.option(
+    "--extract_only_images/--disable-extracting_only_images",
+    "-eoi/-noeoi",
+    is_flag=True,
+    help="If a directory is given, only images in documents will be cropped and saved there and the other processing will not be done",
+)
@click.option(
    "--allow-enhancement/--no-allow-enhancement",
    "-ae/-noae",
@ -247,7 +254,7 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
    help="Override log level globally to this",
 )

-def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level):
+def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level):
    if log_level:
        setOverrideLogLevel(log_level)
    initLogging()
@ -260,6 +267,9 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s
    if textline_light and not light_version:
        print('Error: You used -tll to enable light textline detection but -light is not enabled')
        sys.exit(1)
+
+    if extract_only_images and  (allow_enhancement or allow_scaling or light_version or curved_line or textline_light or full_layout or tables or right2left or headers_off) :
+        print('Error: You used -eoi which can not be enabled alongside light_version -light or allow_scaling -as or allow_enhancement -ae or curved_line -cl or textline_light -tll or full_layout -fl or tables -tab or right2left -r2l or headers_off -ho')
    if light_version and not textline_light:
        print('Error: You used -light without -tll. Light version need light textline to be enabled.')
        sys.exit(1)
@ -269,6 +279,7 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s
        dir_in=dir_in,
        dir_models=model,
        dir_of_cropped_images=save_images,
+        extract_only_images=extract_only_images,
        dir_of_layout=save_layout,
        dir_of_deskewed=save_deskewed,
        dir_of_all=save_all,
--- a/qurator/eynollah/eynollah.py
+++ b/qurator/eynollah/eynollah.py
@ -38,7 +38,8 @@ sys.stderr = stderr
 tf.get_logger().setLevel("ERROR")
 warnings.filterwarnings("ignore")
 import matplotlib.pyplot as plt
-from tensorflow.python.keras.backend import set_session
+# use tf1 compatibility for keras backend
+from tensorflow.compat.v1.keras.backend import set_session
 from tensorflow.keras import layers

 from .utils.contour import (
@ -158,6 +159,7 @@ class Eynollah:
        dir_out=None,
        dir_in=None,
        dir_of_cropped_images=None,
+        extract_only_images=False,
        dir_of_layout=None,
        dir_of_deskewed=None,
        dir_of_all=None,
@ -211,6 +213,8 @@ class Eynollah:
        self.input_binary = input_binary
        self.allow_scaling = allow_scaling
        self.headers_off = headers_off
+        self.light_version = light_version
+        self.extract_only_images = extract_only_images
        self.ignore_page_extraction = ignore_page_extraction
        self.skip_layout_and_reading_order = skip_layout_and_reading_order
        self.ocr = do_ocr
@ -250,6 +254,7 @@ class Eynollah:
        self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425"
        self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425"
        self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314"
+        self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18"
        self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based"
        self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige"
        ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans"
@ -288,7 +293,23 @@ class Eynollah:
            
            self.ls_imgs  = os.listdir(self.dir_in)
            
-        if dir_in and not light_version:
+        if dir_in and self.extract_only_images:
+            config = tf.compat.v1.ConfigProto()
+            config.gpu_options.allow_growth = True
+            session = tf.compat.v1.Session(config=config)
+            set_session(session)
+
+            self.model_page = self.our_load_model(self.model_page_dir)
+            self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
+            self.model_bin = self.our_load_model(self.model_dir_of_binarization)
+            #self.model_textline = self.our_load_model(self.model_textline_dir)
+            self.model_region = self.our_load_model(self.model_region_dir_p_ens_light_only_images_extraction)
+            #self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np)
+            #self.model_region_fl = self.our_load_model(self.model_region_dir_fully)
+
+            self.ls_imgs  = os.listdir(self.dir_in)
+
+        if dir_in and not (light_version or self.extract_only_images):
            config = tf.compat.v1.ConfigProto()
            config.gpu_options.allow_growth = True
            session = tf.compat.v1.Session(config=config)
@ -534,6 +555,27 @@ class Eynollah:

        return img_new, num_column_is_classified

+    def calculate_width_height_by_columns_extract_only_images(self, img, num_col, width_early, label_p_pred):
+        self.logger.debug("enter calculate_width_height_by_columns")
+        if num_col == 1:
+            img_w_new = 700
+        elif num_col == 2:
+            img_w_new = 900
+        elif num_col == 3:
+            img_w_new = 1500
+        elif num_col == 4:
+            img_w_new = 1800
+        elif num_col == 5:
+            img_w_new = 2200
+        elif num_col == 6:
+            img_w_new = 2500
+        img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new)
+
+        img_new = resize_image(img, img_h_new, img_w_new)
+        num_column_is_classified = True
+
+        return img_new, num_column_is_classified
+
    def resize_image_with_column_classifier(self, is_image_enhanced, img_bin):
        self.logger.debug("enter resize_image_with_column_classifier")
        if self.input_binary:
@ -690,6 +732,7 @@ class Eynollah:
        
        self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5))

+        if not self.extract_only_images:
            if dpi < DPI_THRESHOLD:
                if light_version and num_col in (1,2):
                    img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred)
@ -709,6 +752,10 @@ class Eynollah:
                    num_column_is_classified = True
                    image_res = np.copy(img)
                    is_image_enhanced = False
+        else:
+            num_column_is_classified = True
+            image_res = np.copy(img)
+            is_image_enhanced = False

        self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
        return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin
@ -1191,132 +1238,21 @@ class Eynollah:
                    
                    batch_indexer = batch_indexer + 1

-                    #img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
-                    #label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]),
-                                                 #verbose=0)
-                    #seg = np.argmax(label_p_pred, axis=3)[0]
-                    
-                    
-                    ######seg_not_base = label_p_pred[0,:,:,4]
-                    ########seg2 = -label_p_pred[0,:,:,2]
-                    
-                    
-                    ######seg_not_base[seg_not_base>0.03] =1
-                    ######seg_not_base[seg_not_base<1] =0
-                    
-                    
-                    
-                    ######seg_test = label_p_pred[0,:,:,1]
-                    ########seg2 = -label_p_pred[0,:,:,2]
-                    
-                    
-                    ######seg_test[seg_test>0.75] =1
-                    ######seg_test[seg_test<1] =0
-                    
-                    
-                    ######seg_line = label_p_pred[0,:,:,3]
-                    ########seg2 = -label_p_pred[0,:,:,2]
-                    
-                    
-                    ######seg_line[seg_line>0.1] =1
-                    ######seg_line[seg_line<1] =0
-                    
-                    
-                    ######seg_background = label_p_pred[0,:,:,0]
-                    ########seg2 = -label_p_pred[0,:,:,2]
-                    
-                    
-                    ######seg_background[seg_background>0.25] =1
-                    ######seg_background[seg_background<1] =0
-                    ##seg = seg+seg2
-                    #seg = label_p_pred[0,:,:,2]
-                    #seg[seg>0.4] =1
-                    #seg[seg<1] =0
-                    
-                    ##plt.imshow(seg_test)
-                    ##plt.show()
-                    
-                    ##plt.imshow(seg_background)
-                    ##plt.show()
-                    #seg[seg==1]=0
-                    #seg[seg_test==1]=1
-                    ######seg[seg_not_base==1]=4
-                    ######seg[seg_background==1]=0
-                    ######seg[(seg_line==1) & (seg==0)]=3
-                    #seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
-
-                    #if i == 0 and j == 0:
-                        #seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :]
-                        #seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin]
-                        #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg
-                        #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color
-                    #elif i == nxf - 1 and j == nyf - 1:
-                        #seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :]
-                        #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0]
-                        #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg
-                        #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color
-                    #elif i == 0 and j == nyf - 1:
-                        #seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :]
-                        #seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin]
-                        #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg
-                        #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color
-                    #elif i == nxf - 1 and j == 0:
-                        #seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :]
-                        #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0]
-                        #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg
-                        #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color
-                    #elif i == 0 and j != 0 and j != nyf - 1:
-                        #seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :]
-                        #seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin]
-                        #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg
-                        #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color
-                    #elif i == nxf - 1 and j != 0 and j != nyf - 1:
-                        #seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :]
-                        #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0]
-                        #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg
-                        #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color
-                    #elif i != 0 and i != nxf - 1 and j == 0:
-                        #seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :]
-                        #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin]
-                        #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg
-                        #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color
-                    #elif i != 0 and i != nxf - 1 and j == nyf - 1:
-                        #seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :]
-                        #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin]
-                        #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg
-                        #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color
-                    #else:
-                        #seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :]
-                        #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin]
-                        #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg
-                        #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color
-                        
-                        
                    if batch_indexer == n_batch_inference:
                        label_p_pred = model.predict(img_patch,verbose=0)
                        
                        seg = np.argmax(label_p_pred, axis=3)
                        
                        if thresholding_for_some_classes_in_light_version:
-                            
                            seg_art = label_p_pred[:,:,:,4]
                            seg_art[seg_art<0.2] =0
                            seg_art[seg_art>0] =1
-                            ###seg[seg_art==1]=4
-                            ##seg_not_base = label_p_pred[:,:,:,4]
-                            ##seg_not_base[seg_not_base>0.03] =1
-                            ##seg_not_base[seg_not_base<1] =0
                            
                            seg_line = label_p_pred[:,:,:,3]
                            seg_line[seg_line>0.1] =1
                            seg_line[seg_line<1] =0
                            
-                            ##seg_background = label_p_pred[:,:,:,0]
-                            ##seg_background[seg_background>0.25] =1
-                            ##seg_background[seg_background<1] =0
-                            
                            seg[seg_art==1]=4
-                            ##seg[seg_background==1]=0
                            seg[(seg_line==1) & (seg==0)]=3
                        if thresholding_for_artificial_class_in_light_version:
                            seg_art = label_p_pred[:,:,:,2]
@ -1384,20 +1320,15 @@ class Eynollah:
                        
                        seg = np.argmax(label_p_pred, axis=3)
                        if thresholding_for_some_classes_in_light_version:
-                            seg_not_base = label_p_pred[:,:,:,4]
-                            seg_not_base[seg_not_base>0.03] =1
-                            seg_not_base[seg_not_base<1] =0
+                            seg_art = label_p_pred[:,:,:,4]
+                            seg_art[seg_art<0.2] =0
+                            seg_art[seg_art>0] =1
                            
                            seg_line = label_p_pred[:,:,:,3]
                            seg_line[seg_line>0.1] =1
                            seg_line[seg_line<1] =0
                            
-                            seg_background = label_p_pred[:,:,:,0]
-                            seg_background[seg_background>0.25] =1
-                            seg_background[seg_background<1] =0
-                            
-                            seg[seg_not_base==1]=4
-                            seg[seg_background==1]=0
+                            seg[seg_art==1]=4
                            seg[(seg_line==1) & (seg==0)]=3
                            
                        if thresholding_for_artificial_class_in_light_version:
@ -1449,7 +1380,6 @@ class Eynollah:
                                
                            indexer_inside_batch = indexer_inside_batch +1
                        
-                        
                        list_i_s = []
                        list_j_s = []
                        list_x_u = []
@ -1458,7 +1388,6 @@ class Eynollah:
                        list_y_d = []
                        
                        batch_indexer = 0
-                        
                        img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3))

            prediction_true = prediction_true.astype(np.uint8)
@ -2224,6 +2153,125 @@ class Eynollah:
        q.put(slopes_sub)
        poly.put(poly_sub)
        box_sub.put(boxes_sub_new)
+
+    def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier):
+        self.logger.debug("enter get_regions_extract_images_only")
+        erosion_hurts = False
+        img_org = np.copy(img)
+        img_height_h = img_org.shape[0]
+        img_width_h = img_org.shape[1]
+
+        if num_col_classifier == 1:
+            img_w_new = 700
+        elif num_col_classifier == 2:
+            img_w_new = 900
+        elif num_col_classifier == 3:
+            img_w_new = 1500
+        elif num_col_classifier == 4:
+            img_w_new = 1800
+        elif num_col_classifier == 5:
+            img_w_new = 2200
+        elif num_col_classifier == 6:
+            img_w_new = 2500
+        img_h_new = int(img.shape[0] / float(img.shape[1]) * img_w_new)
+
+        img_resized = resize_image(img,img_h_new, img_w_new )
+
+
+
+        if not self.dir_in:
+            model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light_only_images_extraction)
+            prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region)
+        else:
+            prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region)
+
+        #plt.imshow(prediction_regions_org[:,:,0])
+        #plt.show()
+
+        prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )
+
+        image_page, page_coord, cont_page = self.extract_page()
+
+
+        prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
+
+
+        prediction_regions_org=prediction_regions_org[:,:,0]
+
+        mask_lines_only = (prediction_regions_org[:,:] ==3)*1
+
+        mask_texts_only = (prediction_regions_org[:,:] ==1)*1
+
+        mask_images_only=(prediction_regions_org[:,:] ==2)*1
+
+        polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only)
+        polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001)
+
+
+        polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
+
+        polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001)
+
+        text_regions_p_true = np.zeros(prediction_regions_org.shape)
+
+        text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3))
+
+        text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
+
+        text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
+
+
+
+        text_regions_p_true[text_regions_p_true.shape[0]-15:text_regions_p_true.shape[0], :] = 0
+        text_regions_p_true[:, text_regions_p_true.shape[1]-15:text_regions_p_true.shape[1]] = 0
+
+        ##polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.0001)
+        polygons_of_images = return_contours_of_interested_region(text_regions_p_true, 2, 0.001)
+
+        image_boundary_of_doc = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1]))
+
+        ###image_boundary_of_doc[:6, :] = 1
+        ###image_boundary_of_doc[text_regions_p_true.shape[0]-6:text_regions_p_true.shape[0], :] = 1
+
+        ###image_boundary_of_doc[:, :6] = 1
+        ###image_boundary_of_doc[:, text_regions_p_true.shape[1]-6:text_regions_p_true.shape[1]] = 1
+
+        #plt.imshow(image_boundary_of_doc)
+        #plt.show()
+
+        polygons_of_images_fin = []
+        for ploy_img_ind in polygons_of_images:
+            """
+            test_poly_image = np.zeros((text_regions_p_true.shape[0], text_regions_p_true.shape[1]))
+            test_poly_image = cv2.fillPoly(test_poly_image, pts = [ploy_img_ind], color=(1,1,1))
+            
+            test_poly_image = test_poly_image[:,:] + image_boundary_of_doc[:,:]
+            test_poly_image_intersected_area = ( test_poly_image[:,:]==2 )*1
+            
+            test_poly_image_intersected_area = test_poly_image_intersected_area.sum()
+            
+            if test_poly_image_intersected_area==0:
+                ##polygons_of_images_fin.append(ploy_img_ind)
+                
+                x, y, w, h = cv2.boundingRect(ploy_img_ind)
+                box = [x, y, w, h]
+                _, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
+                #cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]]))
+                
+                polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) )
+            """
+            x, y, w, h = cv2.boundingRect(ploy_img_ind)
+            if h < 150 or w < 150:
+                pass
+            else:
+                box = [x, y, w, h]
+                _, page_coord_img = crop_image_inside_box(box, text_regions_p_true)
+                #cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]]))
+
+                polygons_of_images_fin.append(np.array([[page_coord_img[2], page_coord_img[0]], [page_coord_img[3], page_coord_img[0]], [page_coord_img[3], page_coord_img[1]], [page_coord_img[2], page_coord_img[1]]]) )
+
+        return text_regions_p_true, erosion_hurts, polygons_lines_xml, polygons_of_images_fin, image_page, page_coord, cont_page
+
    def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False):
        self.logger.debug("enter get_regions_light_v")
        t_in = time.time()
@ -3179,8 +3227,8 @@ class Eynollah:
        prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20)
        prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20)
        return prediction_table_erode.astype(np.int16)
-    def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light):

+    def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light):
        #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics')
        #print(erosion_hurts, 'erosion_hurts')
        t_in_gr = time.time()
@ -3668,7 +3716,6 @@ class Eynollah:
        img_poly[text_regions_p[:,:]==3] = 4
        img_poly[text_regions_p[:,:]==6] = 5
            
-
        model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir)

        height1 =672#448
@ -3684,7 +3731,6 @@ class Eynollah:
        
        if contours_only_text_parent_h:
            _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h)
-            
            for j in range(len(cy_main)):
                img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 
            
@ -4806,6 +4852,23 @@ class Eynollah:
                self.reset_file_name_dir(os.path.join(self.dir_in,img_name))
                #print("text region early -11 in %.1fs", time.time() - t0)
                
+                
+            if self.extract_only_images:
+                img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
+                self.logger.info("Enhancing took %.1fs ", time.time() - t0)
+
+                text_regions_p_1 ,erosion_hurts, polygons_lines_xml,polygons_of_images,image_page, page_coord, cont_page = self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
+                ocr_all_textlines = None
+                pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], polygons_of_images, [], [], [], [], [], cont_page, [], [], ocr_all_textlines)
+
+                if self.plotter:
+                    self.plotter.write_images_into_directory(polygons_of_images, image_page)
+
+                if self.dir_in:
+                    self.writer.write_pagexml(pcgts)
+                else:
+                    return pcgts
+            else:
                img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
                self.logger.info("Enhancing took %.1fs ", time.time() - t0)
                #print("text region early -1 in %.1fs", time.time() - t0)
--- a/qurator/eynollah/ocrd-tool.json
+++ b/qurator/eynollah/ocrd-tool.json
@ -1,5 +1,5 @@
 {
-  "version": "0.3.0",
+  "version": "0.3.1",
  "git_url": "https://github.com/qurator-spk/eynollah",
  "tools": {
    "ocrd-eynollah-segment": {
@ -52,10 +52,10 @@
      },
      "resources": [
 	{
-	  "description": "models for eynollah (TensorFlow format)",
-	  "url": "https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz",
+	  "description": "models for eynollah (TensorFlow SavedModel format)",
+	  "url": "https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz",
 	  "name": "default",
-	  "size": 1761991295,
+	  "size": 1894627041,
 	  "type": "archive",
 	  "path_in_archive": "models_eynollah"
 	}
--- a/qurator/eynollah/ocrd_cli.py
+++ b/qurator/eynollah/ocrd_cli.py
--- a/qurator/eynollah/plot.py
+++ b/qurator/eynollah/plot.py
--- a/qurator/eynollah/processor.py
+++ b/qurator/eynollah/processor.py
@ -42,7 +42,7 @@ class EynollahProcessor(Processor):
            page = pcgts.get_Page()
            # XXX loses DPI information
            # page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized')
-            image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(url=page.imageFilename))).local_filename
+            image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(local_filename=page.imageFilename))).local_filename
            eynollah_kwargs = {
                'dir_models': self.resolve_resource(self.parameter['models']),
                'allow_enhancement': False,
--- a/qurator/eynollah/sbb_binarize.py
+++ b/qurator/eynollah/sbb_binarize.py
--- a/qurator/eynollah/utils/init.py
+++ b/qurator/eynollah/utils/init.py
--- a/qurator/eynollah/utils/contour.py
+++ b/qurator/eynollah/utils/contour.py
--- a/qurator/eynollah/utils/counter.py
+++ b/qurator/eynollah/utils/counter.py
--- a/qurator/eynollah/utils/drop_capitals.py
+++ b/qurator/eynollah/utils/drop_capitals.py
--- a/qurator/eynollah/utils/is_nan.py
+++ b/qurator/eynollah/utils/is_nan.py
--- a/qurator/eynollah/utils/marginals.py
+++ b/qurator/eynollah/utils/marginals.py
--- a/qurator/eynollah/utils/pil_cv2.py
+++ b/qurator/eynollah/utils/pil_cv2.py
--- a/qurator/eynollah/utils/resize.py
+++ b/qurator/eynollah/utils/resize.py
--- a/qurator/eynollah/utils/rotate.py
+++ b/qurator/eynollah/utils/rotate.py
--- a/qurator/eynollah/utils/separate_lines.py
+++ b/qurator/eynollah/utils/separate_lines.py
--- a/qurator/eynollah/utils/xml.py
+++ b/qurator/eynollah/utils/xml.py
--- a/qurator/eynollah/writer.py
+++ b/qurator/eynollah/writer.py
@ -202,10 +202,18 @@ class EynollahXmlWriter():
            page.add_ImageRegion(img_region)
            points_co = ''
            for lmm in range(len(found_polygons_text_region_img[mm])):
+                try:
                    points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
                    points_co += ','
                    points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
                    points_co += ' '
+                except:
+
+                    points_co +=  str(int((found_polygons_text_region_img[mm][lmm][0] + page_coord[2])/ self.scale_x  ))
+                    points_co += ','
+                    points_co += str(int((found_polygons_text_region_img[mm][lmm][1] + page_coord[0])/ self.scale_y  ))
+                    points_co += ' '
+                    
            img_region.get_Coords().set_points(points_co[:-1])
            
        for mm in range(len(polygons_lines_to_be_written_in_xml)):
--- a/tests/test_counter.py
+++ b/tests/test_counter.py
@ -1,5 +1,5 @@
 from tests.base import main
-from qurator.eynollah.utils.counter import EynollahIdCounter
+from eynollah.utils.counter import EynollahIdCounter

 def test_counter_string():
    c = EynollahIdCounter()
--- a/tests/test_dpi.py
+++ b/tests/test_dpi.py
@ -1,6 +1,6 @@
 import cv2
 from pathlib import Path
-from qurator.eynollah.utils.pil_cv2 import check_dpi
+from eynollah.utils.pil_cv2 import check_dpi
 from tests.base import main

 def test_dpi():
--- a/tests/test_run.py
+++ b/tests/test_run.py
@ -2,7 +2,7 @@ from os import environ
 from pathlib import Path
 from ocrd_utils import pushd_popd
 from tests.base import CapturingTestCase as TestCase, main
-from qurator.eynollah.cli import main as eynollah_cli
+from eynollah.cli import main as eynollah_cli

 testdir = Path(__file__).parent.resolve()

--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@ -1,7 +1,7 @@
 def test_utils_import():
-    import qurator.eynollah.utils
-    import qurator.eynollah.utils.contour
-    import qurator.eynollah.utils.drop_capitals
-    import qurator.eynollah.utils.drop_capitals
-    import qurator.eynollah.utils.is_nan
-    import qurator.eynollah.utils.rotate
+    import eynollah.utils
+    import eynollah.utils.contour
+    import eynollah.utils.drop_capitals
+    import eynollah.utils.drop_capitals
+    import eynollah.utils.is_nan
+    import eynollah.utils.rotate
--- a/tests/test_xml.py
+++ b/tests/test_xml.py
@ -1,5 +1,5 @@
 from pytest import main
-from qurator.eynollah.utils.xml import create_page_xml
+from eynollah.utils.xml import create_page_xml
 from ocrd_models.ocrd_page import to_xml

 PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15'
				`@ -1 +0,0 @@`
				`__import__("pkg_resources").declare_namespace(__name__)`