Merge pull request #19 from qurator-spk/refactor-cntd

Refactor cntd
2026-02-20 16:32:03 +01:00 · 2021-03-01 07:40:09 -05:00 · 2021-03-01 07:40:09 -05:00 · 932c3fb479
commit 932c3fb479
parent 8ab50a5486 58c4403e13
30 changed files with 2526 additions and 7352 deletions
--- a/.github/workflows/test-eynollah.yml
+++ b/.github/workflows/test-eynollah.yml
@ -31,5 +31,6 @@ jobs:
      run: |
        python -m pip install --upgrade pip
        pip install .
+        pip install -r requirements-test.txt
    - name: Test with pytest
-      run: echo success # make test
+      run: make test
--- a/3
+++ b/3
@ -1,3 +1,6 @@
+EYNOLLAH_MODELS ?= $(PWD)/models_eynollah
+export EYNOLLAH_MODELS
+
 # BEGIN-EVAL makefile-parser --make-help Makefile

 help:
--- a/README.md
+++ b/README.md
@ -88,6 +88,21 @@ eynollah \

 The tool does accept and works better on original images (RGB format) than binarized images.

+### `--full-layout` vs `--no-full-layout`
+
+Here are the difference in elements detected depending on the `--full-layout`/`--no-full-layout` command line flags:
+
+|                          | `--full-layout` | `--no-full-layout` |
+| ---                      | ---             | ---                |
+| reading order            | x               | x                  |
+| header regions           | x               | -                  |
+| text regions             | x               | x                  |
+| text regions / text line | x               | x                  |
+| drop-capitals            | x               | -                  |
+| marginals                | x               | x                  |
+| marginals / text line    | x               | x                  |
+| image region             | x               | x                  |
+
 ### How to use

 First of all, this model makes use of up to 9 trained models which are responsible for different operations like size detection, column classification, image enhancement, page extraction, main layout detection, full layout detection and textline detection. But this does not mean that all 9 models are always required for every document. Based on the document characteristics and parameters specified, different scenarios can be applied.
--- a/sbb_newspapers_org_image/.gitkeep
+++ b/sbb_newspapers_org_image/.gitkeep
--- a/qurator/init.py
+++ b/qurator/init.py
@ -0,0 +1 @@
+__import__("pkg_resources").declare_namespace(__name__)
--- a/sbb_newspapers_org_image/init.py
+++ b/sbb_newspapers_org_image/init.py
--- a/sbb_newspapers_org_image/cli.py
+++ b/sbb_newspapers_org_image/cli.py
@ -1,16 +1,23 @@
+import sys
 import click
-from sbb_newspapers_org_image.eynollah import eynollah
+from ocrd_utils import initLogging, setOverrideLogLevel
+from qurator.eynollah.eynollah import Eynollah


@click.command()
@click.option(
-    "--image", "-i", help="image filename", type=click.Path(exists=True, dir_okay=False)
+    "--image",
+    "-i",
+    help="image filename",
+    type=click.Path(exists=True, dir_okay=False),
+    required=True,
 )
@click.option(
    "--out",
    "-o",
    help="directory to write output xml data",
    type=click.Path(exists=True, file_okay=False),
+    required=True,
 )
@click.option(
    "--model",
@ -43,35 +50,47 @@ from sbb_newspapers_org_image.eynollah import eynollah
    type=click.Path(exists=True, file_okay=False),
 )
@click.option(
-    "--allow_enhancement",
-    "-ae",
+    "--enable-plotting/--disable-plotting",
+    "-ep/-noep",
+    is_flag=True,
+    help="If set, will plot intermediary files and images",
+)
+@click.option(
+    "--allow-enhancement/--no-allow-enhancement",
+    "-ae/-noae",
    is_flag=True,
    help="if this parameter set to true, this tool would check that input image need resizing and enhancement or not. If so output of resized and enhanced image and corresponding layout data will be written in out directory",
 )
@click.option(
-    "--curved_line",
-    "-cl",
+    "--curved-line/--no-curvedline",
+    "-cl/-nocl",
    is_flag=True,
    help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectabgle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.",
 )
@click.option(
-    "--full_layout",
-    "-fl",
+    "--full-layout/--no-full-layout",
+    "-fl/-nofl",
    is_flag=True,
    help="if this parameter set to true, this tool will try to return all elements of layout.",
 )
@click.option(
-    "--allow_scaling",
-    "-as",
+    "--allow_scaling/--no-allow-scaling",
+    "-as/-noas",
    is_flag=True,
    help="if this parameter set to true, this tool would check the scale and if needed it will scale it to perform better layout detection",
 )
@click.option(
-    "--headers_off",
-    "-ho",
+    "--headers-off/--headers-on",
+    "-ho/-noho",
    is_flag=True,
    help="if this parameter set to true, this tool would ignore headers role in reading order",
 )
+@click.option(
+    "--log-level",
+    "-l",
+    type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
+    help="Override log level globally to this",
+)
 def main(
    image,
    out,
@ -80,13 +99,24 @@ def main(
    save_layout,
    save_deskewed,
    save_all,
+    enable_plotting,
    allow_enhancement,
    curved_line,
    full_layout,
    allow_scaling,
    headers_off,
+    log_level
 ):
-    eynollah(
+    if log_level:
+        setOverrideLogLevel(log_level)
+    initLogging()
+    if not enable_plotting and (save_layout or save_deskewed or save_all or save_images):
+        print("Error: You used one of -sl, -sd, -sa or -si but did not enable plotting with -ep")
+        sys.exit(1)
+    elif enable_plotting and not (save_layout or save_deskewed or save_all or save_images):
+        print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa or -si")
+        sys.exit(1)
+    eynollah = Eynollah(
        image,
        None,
        out,
@ -95,13 +125,15 @@ def main(
        save_layout,
        save_deskewed,
        save_all,
+        enable_plotting,
        allow_enhancement,
        curved_line,
        full_layout,
        allow_scaling,
        headers_off,
-    ).run()
-
+    )
+    pcgts = eynollah.run()
+    eynollah.writer.write_pagexml(pcgts)

 if __name__ == "__main__":
    main()
--- a/qurator/eynollah/eynollah.py
+++ b/qurator/eynollah/eynollah.py
--- a/qurator/eynollah/plot.py
+++ b/qurator/eynollah/plot.py
@ -0,0 +1,169 @@
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import numpy as np
+import os.path
+import cv2
+from scipy.ndimage import gaussian_filter1d
+
+from .utils import crop_image_inside_box
+from .utils.rotate import rotyate_image_different
+from .utils.resize import resize_image
+
+class EynollahPlotter():
+    """
+    Class collecting all the plotting and image writing methods
+    """
+
+    def __init__(
+        self,
+        *,
+        dir_of_all,
+        dir_of_deskewed,
+        dir_of_layout,
+        dir_of_cropped_images,
+        image_filename,
+        image_filename_stem,
+        image_org=None,
+        scale_x=1,
+        scale_y=1,
+    ):
+        self.dir_of_all = dir_of_all
+        self.dir_of_layout = dir_of_layout
+        self.dir_of_cropped_images = dir_of_cropped_images
+        self.dir_of_deskewed = dir_of_deskewed
+        self.image_filename = image_filename
+        self.image_filename_stem = image_filename_stem
+        # XXX TODO hacky these cannot be set at init time
+        self.image_org = image_org
+        self.scale_x = scale_x
+        self.scale_y = scale_y
+
+    def save_plot_of_layout_main(self, text_regions_p, image_page):
+        if self.dir_of_layout is not None:
+            values = np.unique(text_regions_p[:, :])
+            # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
+            pixels=['Background' , 'Main text'  , 'Image' , 'Separator','Marginalia']
+            values_indexes = [0, 1, 2, 3, 4]
+            plt.figure(figsize=(40, 40))
+            plt.rcParams["font.size"] = "40"
+            im = plt.imshow(text_regions_p[:, :])
+            colors = [im.cmap(im.norm(value)) for value in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
+            plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout_main.png"))
+        
+
+    def save_plot_of_layout_main_all(self, text_regions_p, image_page):
+        if self.dir_of_all is not None:
+            values = np.unique(text_regions_p[:, :])
+            # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
+            pixels=['Background' , 'Main text'  , 'Image' , 'Separator','Marginalia']
+            values_indexes = [0, 1, 2, 3, 4]
+            plt.figure(figsize=(80, 40))
+            plt.rcParams["font.size"] = "40"
+            plt.subplot(1, 2, 1)
+            plt.imshow(image_page)
+            plt.subplot(1, 2, 2)
+            im = plt.imshow(text_regions_p[:, :])
+            colors = [im.cmap(im.norm(value)) for value in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
+            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_main_and_page.png"))
+
+    def save_plot_of_layout(self, text_regions_p, image_page):
+        if self.dir_of_layout is not None:
+            values = np.unique(text_regions_p[:, :])
+            # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
+            pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator"]
+            values_indexes = [0, 1, 2, 8, 4, 5, 6]
+            plt.figure(figsize=(40, 40))
+            plt.rcParams["font.size"] = "40"
+            im = plt.imshow(text_regions_p[:, :])
+            colors = [im.cmap(im.norm(value)) for value in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
+            plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout.png"))
+
+    def save_plot_of_layout_all(self, text_regions_p, image_page):
+        if self.dir_of_all is not None:
+            values = np.unique(text_regions_p[:, :])
+            # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
+            pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator"]
+            values_indexes = [0, 1, 2, 8, 4, 5, 6]
+            plt.figure(figsize=(80, 40))
+            plt.rcParams["font.size"] = "40"
+            plt.subplot(1, 2, 1)
+            plt.imshow(image_page)
+            plt.subplot(1, 2, 2)
+            im = plt.imshow(text_regions_p[:, :])
+            colors = [im.cmap(im.norm(value)) for value in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
+            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_and_page.png"))
+
+    def save_plot_of_textlines(self, textline_mask_tot_ea, image_page):
+        if self.dir_of_all is not None:
+            values = np.unique(textline_mask_tot_ea[:, :])
+            pixels = ["Background", "Textlines"]
+            values_indexes = [0, 1]
+            plt.figure(figsize=(80, 40))
+            plt.rcParams["font.size"] = "40"
+            plt.subplot(1, 2, 1)
+            plt.imshow(image_page)
+            plt.subplot(1, 2, 2)
+            im = plt.imshow(textline_mask_tot_ea[:, :])
+            colors = [im.cmap(im.norm(value)) for value in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
+            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_textline_and_page.png"))
+
+    def save_deskewed_image(self, slope_deskew):
+        if self.dir_of_all is not None:
+            cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_org.png"), self.image_org)
+        if self.dir_of_deskewed is not None:
+            img_rotated = rotyate_image_different(self.image_org, slope_deskew)
+            cv2.imwrite(os.path.join(self.dir_of_deskewed, self.image_filename_stem + "_deskewed.png"), img_rotated)
+
+    def save_page_image(self, image_page):
+        if self.dir_of_all is not None:
+            cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_page.png"), image_page)
+
+    def save_plot_of_textline_density(self, img_patch_org):
+        if self.dir_of_all is not None:
+            plt.figure(figsize=(80,40))
+            plt.rcParams['font.size']='50'
+            plt.subplot(1,2,1)
+            plt.imshow(img_patch_org)
+            plt.subplot(1,2,2)
+            plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))),linewidth=8)
+            plt.xlabel('Density of textline prediction in direction of X axis',fontsize=60)
+            plt.ylabel('Height',fontsize=60)
+            plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))])
+            plt.gca().invert_yaxis()
+            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_density_of_textline.png'))
+
+    def save_plot_of_rotation_angle(self, angels, var_res):
+        if self.dir_of_all is not None:
+            plt.figure(figsize=(60,30))
+            plt.rcParams['font.size']='50'
+            plt.plot(angels,np.array(var_res),'-o',markersize=25,linewidth=4)
+            plt.xlabel('angle',fontsize=50)
+            plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50)
+            plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))]  ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$')
+            plt.legend(loc='best')
+            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_rotation_angle.png'))
+
+    def write_images_into_directory(self, img_contoures, image_page):
+        if self.dir_of_cropped_images is not None:
+            index = 0
+            for cont_ind in img_contoures:
+                x, y, w, h = cv2.boundingRect(cont_ind)
+                box = [x, y, w, h]
+                croped_page, page_coord = crop_image_inside_box(box, image_page)
+
+                croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y), int(croped_page.shape[1] / self.scale_x))
+
+                path = os.path.join(self.dir_of_cropped_images, self.image_filename_stem + "_" + str(index) + ".jpg")
+                cv2.imwrite(path, croped_page)
+                index += 1
+
--- a/sbb_newspapers_org_image/utils/init.py
+++ b/sbb_newspapers_org_image/utils/init.py
@ -299,24 +299,6 @@ def crop_image_inside_box(box, img_org_copy):
    image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]]
    return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]]

-def otsu_copy(img):
-    img_r = np.zeros(img.shape)
-    img1 = img[:, :, 0]
-    img2 = img[:, :, 1]
-    img3 = img[:, :, 2]
-    # print(img.min())
-    # print(img[:,:,0].min())
-    # blur = cv2.GaussianBlur(img,(5,5))
-    # ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
-    retval1, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-    retval2, threshold2 = cv2.threshold(img2, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-    retval3, threshold3 = cv2.threshold(img3, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-
-    img_r[:, :, 0] = threshold1
-    img_r[:, :, 1] = threshold1
-    img_r[:, :, 2] = threshold1
-    return img_r
-
 def otsu_copy_binary(img):
    img_r = np.zeros((img.shape[0], img.shape[1], 3))
    img1 = img[:, :, 0]
@ -372,242 +354,42 @@ def boosting_headers_by_longshot_region_segmentation(textregion_pre_p, textregio


 def find_num_col_deskew(regions_without_seperators, sigma_, multiplier=3.8):
-    regions_without_seperators_0=regions_without_seperators[:,:].sum(axis=1)
+    regions_without_seperators_0 = regions_without_seperators[:,:].sum(axis=1)
+    z = gaussian_filter1d(regions_without_seperators_0, sigma_)
+    return np.std(z)

-    ##meda_n_updown=regions_without_seperators_0[len(regions_without_seperators_0)::-1]
-
-    ##first_nonzero=(next((i for i, x in enumerate(regions_without_seperators_0) if x), 0))
-    ##last_nonzero=(next((i for i, x in enumerate(meda_n_updown) if x), 0))
-
-    ##last_nonzero=len(regions_without_seperators_0)-last_nonzero
-
-
-    y=regions_without_seperators_0#[first_nonzero:last_nonzero]
-
-    ##y_help=np.zeros(len(y)+20)
-
-    ##y_help[10:len(y)+10]=y
-
-    ##x=np.array( range(len(y)) )
-
-
-
-
-    ##zneg_rev=-y_help+np.max(y_help)
-
-    ##zneg=np.zeros(len(zneg_rev)+20)
-
-    ##zneg[10:len(zneg_rev)+10]=zneg_rev
-
-    z=gaussian_filter1d(y, sigma_)
-    ###zneg= gaussian_filter1d(zneg, sigma_)
-
-
-    ###peaks_neg, _ = find_peaks(zneg, height=0)
-    ###peaks, _ = find_peaks(z, height=0)
-
-    ###peaks_neg=peaks_neg-10-10
-
-    ####print(np.std(z),'np.std(z)np.std(z)np.std(z)')
-
-    #####plt.plot(z)
-    #####plt.show()
-
-    #####plt.imshow(regions_without_seperators)
-    #####plt.show()
-    ###"""
-    ###last_nonzero=last_nonzero-0#100
-    ###first_nonzero=first_nonzero+0#+100
-
-    ###peaks_neg=peaks_neg[(peaks_neg>first_nonzero) & (peaks_neg<last_nonzero)]
-
-    ###peaks=peaks[(peaks>.06*regions_without_seperators.shape[1]) & (peaks<0.94*regions_without_seperators.shape[1])]
-    ###"""
-    ###interest_pos=z[peaks]
-
-    ###interest_pos=interest_pos[interest_pos>10]
-
-    ###interest_neg=z[peaks_neg]
-
-    ###min_peaks_pos=np.mean(interest_pos)
-    ###min_peaks_neg=0#np.min(interest_neg)
-
-    ###dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier
-    ####print(interest_pos)
-    ###grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
-
-    ###interest_neg_fin=interest_neg[(interest_neg<grenze)]
-    ###peaks_neg_fin=peaks_neg[(interest_neg<grenze)]
-    ###interest_neg_fin=interest_neg[(interest_neg<grenze)]
-
-    ###"""
-    ###if interest_neg[0]<0.1:
-        ###interest_neg=interest_neg[1:]
-    ###if interest_neg[len(interest_neg)-1]<0.1:
-        ###interest_neg=interest_neg[:len(interest_neg)-1]
-
-
-
-    ###min_peaks_pos=np.min(interest_pos)
-    ###min_peaks_neg=0#np.min(interest_neg)
-
-
-    ###dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier
-    ###grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
-    ###"""
-    ####interest_neg_fin=interest_neg#[(interest_neg<grenze)]
-    ####peaks_neg_fin=peaks_neg#[(interest_neg<grenze)]
-    ####interest_neg_fin=interest_neg#[(interest_neg<grenze)]
-
-    ###num_col=(len(interest_neg_fin))+1
-
-
-    ###p_l=0
-    ###p_u=len(y)-1
-    ###p_m=int(len(y)/2.)
-    ###p_g_l=int(len(y)/3.)
-    ###p_g_u=len(y)-int(len(y)/3.)
-
-
-    ###diff_peaks=np.abs( np.diff(peaks_neg_fin) )
-    ###diff_peaks_annormal=diff_peaks[diff_peaks<30]
-
-    #print(len(interest_neg_fin),np.mean(interest_neg_fin))
-    return np.std(z)#interest_neg_fin,np.std(z)
-
-def return_hor_spliter_by_index_for_without_verticals(peaks_neg_fin_t, x_min_hor_some, x_max_hor_some):
-    # print(peaks_neg_fin_t,x_min_hor_some,x_max_hor_some)
-    arg_min_hor_sort = np.argsort(x_min_hor_some)
-    x_min_hor_some_sort = np.sort(x_min_hor_some)
-    x_max_hor_some_sort = x_max_hor_some[arg_min_hor_sort]
-
-    arg_minmax = np.array(range(len(peaks_neg_fin_t)))
-    indexer_lines = []
-    indexes_to_delete = []
-    indexer_lines_deletions_len = []
-    indexr_uniq_ind = []
-    for i in range(len(x_min_hor_some_sort)):
-        min_h = peaks_neg_fin_t - x_min_hor_some_sort[i]
-
-        max_h = peaks_neg_fin_t - x_max_hor_some_sort[i]
-
-        min_h[0] = min_h[0]  # +20
-        max_h[len(max_h) - 1] = max_h[len(max_h) - 1] - 20
-
-        min_h_neg = arg_minmax[(min_h < 0)]
-        min_h_neg_n = min_h[min_h < 0]
-
-        try:
-            min_h_neg = [min_h_neg[np.argmax(min_h_neg_n)]]
-        except:
-            min_h_neg = []
-
-        max_h_neg = arg_minmax[(max_h > 0)]
-        max_h_neg_n = max_h[max_h > 0]
-
-        if len(max_h_neg_n) > 0:
-            max_h_neg = [max_h_neg[np.argmin(max_h_neg_n)]]
-        else:
-            max_h_neg = []
-
-        if len(min_h_neg) > 0 and len(max_h_neg) > 0:
-            deletions = list(range(min_h_neg[0] + 1, max_h_neg[0]))
-            unique_delets_int = []
-            # print(deletions,len(deletions),'delii')
-            if len(deletions) > 0:
-
-                for j in range(len(deletions)):
-                    indexes_to_delete.append(deletions[j])
-                    # print(deletions,indexes_to_delete,'badiii')
-                    unique_delets = np.unique(indexes_to_delete)
-                    # print(min_h_neg[0],unique_delets)
-                    unique_delets_int = unique_delets[unique_delets < min_h_neg[0]]
-
-                indexer_lines_deletions_len.append(len(deletions))
-                indexr_uniq_ind.append([deletions])
-
-            else:
-                indexer_lines_deletions_len.append(0)
-                indexr_uniq_ind.append(-999)
-
-            index_line_true = min_h_neg[0] - len(unique_delets_int)
-            # print(index_line_true)
-            if index_line_true > 0 and min_h_neg[0] >= 2:
-                index_line_true = index_line_true
-            else:
-                index_line_true = min_h_neg[0]
-
-            indexer_lines.append(index_line_true)
-
-            if len(unique_delets_int) > 0:
-                for dd in range(len(unique_delets_int)):
-                    indexes_to_delete.append(unique_delets_int[dd])
-        else:
-            indexer_lines.append(-999)
-            indexer_lines_deletions_len.append(-999)
-            indexr_uniq_ind.append(-999)
-
-    peaks_true = []
-    for m in range(len(peaks_neg_fin_t)):
-        if m in indexes_to_delete:
-            pass
-        else:
-            peaks_true.append(peaks_neg_fin_t[m])
-    return indexer_lines, peaks_true, arg_min_hor_sort, indexer_lines_deletions_len, indexr_uniq_ind

 def find_num_col(regions_without_seperators, multiplier=3.8):
    regions_without_seperators_0 = regions_without_seperators[:, :].sum(axis=0)
-
    ##plt.plot(regions_without_seperators_0)
    ##plt.show()
-
    sigma_ = 35  # 70#35
-
    meda_n_updown = regions_without_seperators_0[len(regions_without_seperators_0) :: -1]
-
    first_nonzero = next((i for i, x in enumerate(regions_without_seperators_0) if x), 0)
    last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0)
-
-    # print(last_nonzero)
-    # print(isNaN(last_nonzero))
-    # last_nonzero=0#halalikh
    last_nonzero = len(regions_without_seperators_0) - last_nonzero
-
    y = regions_without_seperators_0  # [first_nonzero:last_nonzero]
-
    y_help = np.zeros(len(y) + 20)
-
    y_help[10 : len(y) + 10] = y
-
    x = np.array(range(len(y)))
-
    zneg_rev = -y_help + np.max(y_help)
-
    zneg = np.zeros(len(zneg_rev) + 20)
-
    zneg[10 : len(zneg_rev) + 10] = zneg_rev
-
    z = gaussian_filter1d(y, sigma_)
    zneg = gaussian_filter1d(zneg, sigma_)

    peaks_neg, _ = find_peaks(zneg, height=0)
    peaks, _ = find_peaks(z, height=0)
-
    peaks_neg = peaks_neg - 10 - 10

    last_nonzero = last_nonzero - 100
    first_nonzero = first_nonzero + 200

    peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & (peaks_neg < last_nonzero)]
-
    peaks = peaks[(peaks > 0.06 * regions_without_seperators.shape[1]) & (peaks < 0.94 * regions_without_seperators.shape[1])]
    peaks_neg = peaks_neg[(peaks_neg > 370) & (peaks_neg < (regions_without_seperators.shape[1] - 370))]
-
-    # print(peaks)
    interest_pos = z[peaks]
-
    interest_pos = interest_pos[interest_pos > 10]
-
    # plt.plot(z)
    # plt.show()
    interest_neg = z[peaks_neg]
@ -621,9 +403,7 @@ def find_num_col(regions_without_seperators, multiplier=3.8):
    min_peaks_neg = 0  # np.min(interest_neg)

    # print(np.min(interest_pos),np.max(interest_pos),np.max(interest_pos)/np.min(interest_pos),'minmax')
-    # $print(min_peaks_pos)
    dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier
-    # print(interest_pos)
    grenze = min_peaks_pos - dis_talaei  # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0

    # print(interest_neg,'interest_neg')
@ -650,15 +430,11 @@ def find_num_col(regions_without_seperators, multiplier=3.8):
        if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or (peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or ((peaks_neg_fin[0] + 200) < p_m and peaks_neg_fin[1] < p_m) or ((peaks_neg_fin[0] - 200) > p_m and peaks_neg_fin[1] > p_m):
            num_col = 1
            peaks_neg_fin = []
-        else:
-            pass

    if num_col == 2:
        if (peaks_neg_fin[0] > p_g_u) or (peaks_neg_fin[0] < p_g_l):
            num_col = 1
            peaks_neg_fin = []
-        else:
-            pass

    ##print(len(peaks_neg_fin))

@ -673,7 +449,7 @@ def find_num_col(regions_without_seperators, multiplier=3.8):
    for i in range(len(peaks_neg_fin)):
        if i == 0:
            forest.append(peaks_neg_fin[i])
-        if i < (len(peaks_neg_fin) - 1):
+        if i < len(peaks_neg_fin) - 1:
            if diff_peaks[i] <= cut_off:
                forest.append(peaks_neg_fin[i + 1])
            if diff_peaks[i] > cut_off:
@ -687,7 +463,7 @@ def find_num_col(regions_without_seperators, multiplier=3.8):
            if not isNaN(forest[np.argmin(z[forest])]):
                peaks_neg_true.append(forest[np.argmin(z[forest])])

-    num_col = (len(peaks_neg_true)) + 1
+    num_col = len(peaks_neg_true) + 1
    p_l = 0
    p_u = len(y) - 1
    p_m = int(len(y) / 2.0)
@ -706,15 +482,11 @@ def find_num_col(regions_without_seperators, multiplier=3.8):
            peaks_neg_true = [peaks_neg_true[0]]
        elif (peaks_neg_true[1] < p_g_u and peaks_neg_true[1] > p_g_l) and (peaks_neg_true[0] < p_quarter):
            peaks_neg_true = [peaks_neg_true[1]]
-        else:
-            pass

    if num_col == 2:
        if (peaks_neg_true[0] > p_g_u) or (peaks_neg_true[0] < p_g_l):
            num_col = 1
            peaks_neg_true = []
-        else:
-            pass

    diff_peaks_annormal = diff_peaks[diff_peaks < 360]

@ -732,9 +504,7 @@ def find_num_col(regions_without_seperators, multiplier=3.8):
                else:
                    peaks_neg_fin_new.append(peaks_neg_fin[ii + 1])

-            elif (ii - 1) in arg_help_ann:
-                pass
-            else:
+            elif (ii - 1) not in arg_help_ann:
                peaks_neg_fin_new.append(peaks_neg_fin[ii])
    else:
        peaks_neg_fin_new = peaks_neg_fin
@ -948,28 +718,6 @@ def find_num_col_by_vertical_lines(regions_without_seperators, multiplier=3.8):
    # print(peaks,'peaksnew')
    return peaks

-
-def delete_seperator_around(spliter_y, peaks_neg, image_by_region):
-    # format of subboxes box=[x1, x2 , y1, y2]
-
-    if len(image_by_region.shape) == 3:
-        for i in range(len(spliter_y) - 1):
-            for j in range(1, len(peaks_neg[i]) - 1):
-                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0] == 6] = 0
-                image_by_region[spliter_y[i] : spliter_y[i + 1], peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 1] == 6] = 0
-                image_by_region[spliter_y[i] : spliter_y[i + 1], peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 2] == 6] = 0
-
-                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0] == 7] = 0
-                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 1] == 7] = 0
-                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 2] == 7] = 0
-    else:
-        for i in range(len(spliter_y) - 1):
-            for j in range(1, len(peaks_neg[i]) - 1):
-                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])] == 6] = 0
-
-                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])] == 7] = 0
-    return image_by_region
-
 def return_regions_without_seperators(regions_pre):
    kernel = np.ones((5, 5), np.uint8)
    regions_without_seperators = ((regions_pre[:, :] != 6) & (regions_pre[:, :] != 0)) * 1
@ -1432,166 +1180,6 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):

    return final_indexers_sorted, matrix_of_orders, final_types, final_index_type

-def implent_law_head_main_not_parallel(text_regions):
-    # print(text_regions.shape)
-    text_indexes = [1, 2]  # 1: main text , 2: header , 3: comments
-
-    for t_i in text_indexes:
-        textline_mask = text_regions[:, :] == t_i
-        textline_mask = textline_mask * 255.0
-
-        textline_mask = textline_mask.astype(np.uint8)
-        textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2)
-        kernel = np.ones((5, 5), np.uint8)
-
-        # print(type(textline_mask),np.unique(textline_mask),textline_mask.shape)
-        imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY)
-        ret, thresh = cv2.threshold(imgray, 0, 255, 0)
-
-        if t_i == 1:
-            contours_main, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
-            # print(type(contours_main))
-            areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
-            M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
-            cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
-            cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
-            x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
-            x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
-
-            y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
-            y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
-            # print(contours_main[0],np.shape(contours_main[0]),contours_main[0][:,0,0])
-        elif t_i == 2:
-            contours_header, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
-            # print(type(contours_header))
-            areas_header = np.array([cv2.contourArea(contours_header[j]) for j in range(len(contours_header))])
-            M_header = [cv2.moments(contours_header[j]) for j in range(len(contours_header))]
-            cx_header = [(M_header[j]["m10"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
-            cy_header = [(M_header[j]["m01"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
-
-            x_min_header = np.array([np.min(contours_header[j][:, 0, 0]) for j in range(len(contours_header))])
-            x_max_header = np.array([np.max(contours_header[j][:, 0, 0]) for j in range(len(contours_header))])
-
-            y_min_header = np.array([np.min(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
-            y_max_header = np.array([np.max(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
-
-    args = np.array(range(1, len(cy_header) + 1))
-    args_main = np.array(range(1, len(cy_main) + 1))
-    for jj in range(len(contours_main)):
-        headers_in_main = [(cy_header > y_min_main[jj]) & ((cy_header < y_max_main[jj]))]
-        mains_in_main = [(cy_main > y_min_main[jj]) & ((cy_main < y_max_main[jj]))]
-        args_log = args * headers_in_main
-        res = args_log[args_log > 0]
-        res_true = res - 1
-
-        args_log_main = args_main * mains_in_main
-        res_main = args_log_main[args_log_main > 0]
-        res_true_main = res_main - 1
-
-        if len(res_true) > 0:
-            sum_header = np.sum(areas_header[res_true])
-            sum_main = np.sum(areas_main[res_true_main])
-            if sum_main > sum_header:
-                cnt_int = [contours_header[j] for j in res_true]
-                text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(1, 1, 1))
-            else:
-                cnt_int = [contours_main[j] for j in res_true_main]
-                text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(2, 2, 2))
-
-    for jj in range(len(contours_header)):
-        main_in_header = [(cy_main > y_min_header[jj]) & ((cy_main < y_max_header[jj]))]
-        header_in_header = [(cy_header > y_min_header[jj]) & ((cy_header < y_max_header[jj]))]
-        args_log = args_main * main_in_header
-        res = args_log[args_log > 0]
-        res_true = res - 1
-
-        args_log_header = args * header_in_header
-        res_header = args_log_header[args_log_header > 0]
-        res_true_header = res_header - 1
-
-        if len(res_true) > 0:
-
-            sum_header = np.sum(areas_header[res_true_header])
-            sum_main = np.sum(areas_main[res_true])
-
-            if sum_main > sum_header:
-
-                cnt_int = [contours_header[j] for j in res_true_header]
-                text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(1, 1, 1))
-            else:
-                cnt_int = [contours_main[j] for j in res_true]
-                text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(2, 2, 2))
-
-    return text_regions
-
-
-def return_hor_spliter_by_index(peaks_neg_fin_t, x_min_hor_some, x_max_hor_some):
-
-    arg_min_hor_sort = np.argsort(x_min_hor_some)
-    x_min_hor_some_sort = np.sort(x_min_hor_some)
-    x_max_hor_some_sort = x_max_hor_some[arg_min_hor_sort]
-
-    arg_minmax = np.array(range(len(peaks_neg_fin_t)))
-    indexer_lines = []
-    indexes_to_delete = []
-    indexer_lines_deletions_len = []
-    indexr_uniq_ind = []
-    for i in range(len(x_min_hor_some_sort)):
-        min_h = peaks_neg_fin_t - x_min_hor_some_sort[i]
-        max_h = peaks_neg_fin_t - x_max_hor_some_sort[i]
-
-        min_h[0] = min_h[0]  # +20
-        max_h[len(max_h) - 1] = max_h[len(max_h) - 1]  ##-20
-
-        min_h_neg = arg_minmax[(min_h < 0) & (np.abs(min_h) < 360)]
-        max_h_neg = arg_minmax[(max_h >= 0) & (np.abs(max_h) < 360)]
-
-        if len(min_h_neg) > 0 and len(max_h_neg) > 0:
-            deletions = list(range(min_h_neg[0] + 1, max_h_neg[0]))
-            unique_delets_int = []
-            # print(deletions,len(deletions),'delii')
-            if len(deletions) > 0:
-                # print(deletions,len(deletions),'delii2')
-
-                for j in range(len(deletions)):
-                    indexes_to_delete.append(deletions[j])
-                    # print(deletions,indexes_to_delete,'badiii')
-                    unique_delets = np.unique(indexes_to_delete)
-                    # print(min_h_neg[0],unique_delets)
-                    unique_delets_int = unique_delets[unique_delets < min_h_neg[0]]
-
-                indexer_lines_deletions_len.append(len(deletions))
-                indexr_uniq_ind.append([deletions])
-
-            else:
-                indexer_lines_deletions_len.append(0)
-                indexr_uniq_ind.append(-999)
-
-            index_line_true = min_h_neg[0] - len(unique_delets_int)
-            # print(index_line_true)
-            if index_line_true > 0 and min_h_neg[0] >= 2:
-                index_line_true = index_line_true
-            else:
-                index_line_true = min_h_neg[0]
-
-            indexer_lines.append(index_line_true)
-
-            if len(unique_delets_int) > 0:
-                for dd in range(len(unique_delets_int)):
-                    indexes_to_delete.append(unique_delets_int[dd])
-        else:
-            indexer_lines.append(-999)
-            indexer_lines_deletions_len.append(-999)
-            indexr_uniq_ind.append(-999)
-
-    peaks_true = []
-    for m in range(len(peaks_neg_fin_t)):
-        if m in indexes_to_delete:
-            pass
-        else:
-            peaks_true.append(peaks_neg_fin_t[m])
-    return indexer_lines, peaks_true, arg_min_hor_sort, indexer_lines_deletions_len, indexr_uniq_ind
-
 def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(img_p_in_ver, img_in_hor,num_col_classifier):
    #img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2)
    img_p_in_ver=img_p_in_ver.astype(np.uint8)
--- a/sbb_newspapers_org_image/utils/contour.py
+++ b/sbb_newspapers_org_image/utils/contour.py
@ -26,39 +26,6 @@ def find_contours_mean_y_diff(contours_main):
    cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
    return np.mean(np.diff(np.sort(np.array(cy_main))))

-def find_features_of_contours(contours_main):
-
-    areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
-    M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
-    cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
-    cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
-    x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
-    x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
-
-    y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
-    y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
-
-    return y_min_main, y_max_main, areas_main
-
-def return_contours_of_interested_region_and_bounding_box(region_pre_p, pixel):
-
-    # pixels of images are identified by 5
-    cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
-    cnts_images = cnts_images.astype(np.uint8)
-    cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
-    imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
-    ret, thresh = cv2.threshold(imgray, 0, 255, 0)
-    contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
-
-    contours_imgs = return_parent_contours(contours_imgs, hiearchy)
-    contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=0.0003)
-
-    boxes = []
-
-    for jj in range(len(contours_imgs)):
-        x, y, w, h = cv2.boundingRect(contours_imgs[jj])
-        boxes.append([int(x), int(y), int(w), int(h)])
-    return contours_imgs, boxes

 def get_text_region_boxes_by_given_contours(contours):

@ -76,7 +43,6 @@ def get_text_region_boxes_by_given_contours(contours):

 def filter_contours_area_of_image(image, contours, hirarchy, max_area, min_area):
    found_polygons_early = list()
-
    jv = 0
    for c in contours:
        if len(c) < 3:  # A polygon cannot have less than 3 points
@ -89,23 +55,6 @@ def filter_contours_area_of_image(image, contours, hirarchy, max_area, min_area)
        jv += 1
    return found_polygons_early

-def filter_contours_area_of_image_interiors(image, contours, hirarchy, max_area, min_area):
-    found_polygons_early = list()
-
-    jv = 0
-    for c in contours:
-        if len(c) < 3:  # A polygon cannot have less than 3 points
-            continue
-
-        polygon = geometry.Polygon([point[0] for point in c])
-        area = polygon.area
-        if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hirarchy[0][jv][3] != -1:
-            # print(c[0][0][1])
-            found_polygons_early.append(np.array([point for point in polygon.exterior.coords], dtype=np.uint))
-        jv += 1
-    return found_polygons_early
-
-
 def filter_contours_area_of_image_tables(image, contours, hirarchy, max_area, min_area):
    found_polygons_early = list()

@ -236,15 +185,6 @@ def return_contours_of_interested_textline(region_pre_p, pixel):
    contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=0.000000003)
    return contours_imgs

-def return_bonding_box_of_contours(cnts):
-    boxes_tot = []
-    for i in range(len(cnts)):
-        x, y, w, h = cv2.boundingRect(cnts[i])
-
-        box = [x, y, w, h]
-        boxes_tot.append(box)
-    return boxes_tot
-
 def return_contours_of_image(image):

    if len(image.shape) == 2:
--- a/sbb_newspapers_org_image/utils/drop_capitals.py
+++ b/sbb_newspapers_org_image/utils/drop_capitals.py
--- a/sbb_newspapers_org_image/utils/is_nan.py
+++ b/sbb_newspapers_org_image/utils/is_nan.py
--- a/sbb_newspapers_org_image/utils/marginals.py
+++ b/sbb_newspapers_org_image/utils/marginals.py
--- a/qurator/eynollah/utils/pil_cv2.py
+++ b/qurator/eynollah/utils/pil_cv2.py
@ -0,0 +1,24 @@
+from PIL import Image
+import numpy as np
+from ocrd_models import OcrdExif
+from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, cvtColor, imread
+
+# from sbb_binarization
+
+def cv2pil(img):
+    return Image.fromarray(img.astype('uint8'))
+
+def pil2cv(img):
+    # from ocrd/workspace.py
+    color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else  COLOR_RGB2BGR
+    pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
+    return cvtColor(pil_as_np_array, color_conversion)
+
+def check_dpi(image_filename):
+    exif = OcrdExif(Image.open(image_filename))
+    print(exif.to_xml())
+    resolution = exif.resolution
+    if exif.resolutionUnit == 'cm':
+        resolution /= 2.54
+    return int(resolution)
+
--- a/sbb_newspapers_org_image/utils/resize.py
+++ b/sbb_newspapers_org_image/utils/resize.py
--- a/sbb_newspapers_org_image/utils/rotate.py
+++ b/sbb_newspapers_org_image/utils/rotate.py
--- a/sbb_newspapers_org_image/utils/separate_lines.py
+++ b/sbb_newspapers_org_image/utils/separate_lines.py
@ -1,4 +1,3 @@
-import matplotlib.pyplot as plt
 import numpy as np
 import cv2
 from scipy.signal import find_peaks
@ -14,31 +13,8 @@ from .contour import (
 )
 from .is_nan import isNaN
 from . import (
-    boosting_headers_by_longshot_region_segmentation,
-    crop_image_inside_box,
-    find_features_of_lines,
-    find_num_col,
-    find_num_col_by_vertical_lines,
    find_num_col_deskew,
-    find_num_col_only_image,
    isNaN,
-    otsu_copy,
-    otsu_copy_binary,
-    return_hor_spliter_by_index_for_without_verticals,
-    delete_seperator_around,
-    return_regions_without_seperators,
-    put_drop_out_from_only_drop_model,
-    putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
-    check_any_text_region_in_model_one_is_main_or_header,
-    small_textlines_to_parent_adherence2,
-    order_and_id_of_texts,
-    order_of_regions,
-    implent_law_head_main_not_parallel,
-    return_hor_spliter_by_index,
-    combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new,
-    return_points_with_boundies,
-    find_number_of_columns_in_document,
-    return_boxes_of_images_by_order_of_reading_new,
 )

 def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
@ -1395,7 +1371,7 @@ def seperate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
    return None, cont_final


-def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, slope_first, add_boxes_coor_into_textlines=False):
+def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False):

    textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
    textline_mask = textline_mask.astype(np.uint8)
@ -1485,7 +1461,7 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest

    return contours_rotated_clean

-def seperate_lines_new2(img_path, thetha, num_col, slope_region, dir_of_all, f_name):
+def seperate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None):

    if num_col == 1:
        num_patches = int(img_path.shape[1] / 200.0)
@ -1536,7 +1512,7 @@ def seperate_lines_new2(img_path, thetha, num_col, slope_region, dir_of_all, f_n

        sigma = 2
        try:
-            slope_xline = return_deskew_slop(img_xline, sigma, dir_of_all=dir_of_all, f_name=f_name)
+            slope_xline = return_deskew_slop(img_xline, sigma, plotter=plotter)
        except:
            slope_xline = 0

@ -1593,29 +1569,10 @@ def seperate_lines_new2(img_path, thetha, num_col, slope_region, dir_of_all, f_n
    # plt.show()
    return img_patch_ineterst_revised

-def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=None, f_name=None):
+def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):

-
-    if main_page and dir_of_all is not None:
-
-
-        plt.figure(figsize=(80,40))
-        plt.rcParams['font.size']='50'
-        plt.subplot(1,2,1)
-        plt.imshow(img_patch_org)
-        plt.subplot(1,2,2)
-        plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))),linewidth=8)
-        plt.xlabel('Density of textline prediction in direction of X axis',fontsize=60)
-        plt.ylabel('Height',fontsize=60)
-        plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))])
-        plt.gca().invert_yaxis()
-
-        plt.savefig(os.path.join(dir_of_all, f_name+'_density_of_textline.png'))
-    #print(np.max(img_patch_org.sum(axis=0)) ,np.max(img_patch_org.sum(axis=1)),'axislar')
-
-    #img_patch_org=resize_image(img_patch_org,int(img_patch_org.shape[0]*2.5),int(img_patch_org.shape[1]/2.5))
-
-    #print(np.max(img_patch_org.sum(axis=0)) ,np.max(img_patch_org.sum(axis=1)),'axislar2')
+    if main_page and plotter:
+        plotter.save_plot_of_textline_density(img_patch_org)

    img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1]))
    img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
@ -1647,53 +1604,23 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
        #plt.show()
        angels=np.array([-45, 0 , 45 , 90 , ])#np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45])

-        #res=[]
-        #num_of_peaks=[]
-        #index_cor=[]
        var_res=[]

-        #indexer=0
        for rot in angels:
            img_rot=rotate_image(img_resized,rot)
            #plt.imshow(img_rot)
            #plt.show()
            img_rot[img_rot!=0]=1
-            #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
-
-
            #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3  )
            #print(var_spectrum,'var_spectrum')
            try:
                var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
                ##print(rot,var_spectrum,'var_spectrum')
-                #res_me=np.mean(neg_peaks)
-                #if res_me==0:
-                    #res_me=1000000000000000000000
-                #else:
-                    #pass
-
-                #res_num=len(neg_peaks)
            except:
-                #res_me=1000000000000000000000
-                #res_num=0
                var_spectrum=0
-            #if self.isNaN(res_me):
-                #pass
-            #else:
-                #res.append( res_me )
-                #var_res.append(var_spectrum)
-                #num_of_peaks.append( res_num )
-                #index_cor.append(indexer)
-            #indexer=indexer+1
-
            var_res.append(var_spectrum)
-            #index_cor.append(indexer)
-            #indexer=indexer+1
-
-
        try:
            var_res=np.array(var_res)
-
            ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
        except:
            ang_int=0
@ -1701,32 +1628,19 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non

        angels=np.linspace(ang_int-22.5,ang_int+22.5,100)

-        #res=[]
-        #num_of_peaks=[]
-        #index_cor=[]
        var_res=[]
-
-
        for rot in angels:
            img_rot=rotate_image(img_resized,rot)
            ##plt.imshow(img_rot)
            ##plt.show()
            img_rot[img_rot!=0]=1
-            #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
            try:
                var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
-
            except:
                var_spectrum=0
-
            var_res.append(var_spectrum)
-
-
-
-
        try:
            var_res=np.array(var_res)
-
            ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
        except:
            ang_int=0
@ -1745,9 +1659,6 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
            #plt.imshow(img_rot)
            #plt.show()
            img_rot[img_rot!=0]=1
-            #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
-
-
            #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3  )
            #print(var_spectrum,'var_spectrum')
            try:
@ -1759,51 +1670,30 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
            var_res.append(var_spectrum)


-        if dir_of_all is not None:
-            #print('galdi?')
-            plt.figure(figsize=(60,30))
-            plt.rcParams['font.size']='50'
-            plt.plot(angels,np.array(var_res),'-o',markersize=25,linewidth=4)
-            plt.xlabel('angle',fontsize=50)
-            plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50)
-
-            plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))]  ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$')
-            plt.legend(loc='best')
-            plt.savefig(os.path.join(dir_of_all,f_name+'_rotation_angle.png'))
-
-
+        if plotter:
+            plotter.save_plot_of_rotation_angle(angels, var_res)
        try:
            var_res=np.array(var_res)
-
            ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
        except:
            ang_int=0

-
        early_slope_edge=11
        if abs(ang_int)>early_slope_edge and ang_int<0:
-
            angels=np.linspace(-90,-12,100)
-
            var_res=[]
-
            for rot in angels:
                img_rot=rotate_image(img_resized,rot)
                ##plt.imshow(img_rot)
                ##plt.show()
                img_rot[img_rot!=0]=1
-                #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
                try:
                    var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
                except:
                    var_spectrum=0
-
                var_res.append(var_spectrum)
-
-
            try:
                var_res=np.array(var_res)
-
                ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
            except:
                ang_int=0
@ -1811,67 +1701,47 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
        elif abs(ang_int)>early_slope_edge and ang_int>0:

            angels=np.linspace(90,12,100)
-
-
            var_res=[]
-
            for rot in angels:
                img_rot=rotate_image(img_resized,rot)
                ##plt.imshow(img_rot)
                ##plt.show()
                img_rot[img_rot!=0]=1
-                #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
                try:
                    var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
                    #print(indexer,'indexer')
                except:
                    var_spectrum=0
-
                var_res.append(var_spectrum)
-
-
            try:
                var_res=np.array(var_res)
-
                ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
            except:
                ang_int=0
    else:
-
-
        angels=np.linspace(-25,25,60)
-
        var_res=[]
-
        indexer=0
        for rot in angels:
            img_rot=rotate_image(img_resized,rot)
            #plt.imshow(img_rot)
            #plt.show()
            img_rot[img_rot!=0]=1
-            #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
-
-
            #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3  )
            #print(var_spectrum,'var_spectrum')
            try:
                var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
            except:
                var_spectrum=0
-
            var_res.append(var_spectrum)
-
-
        try:
            var_res=np.array(var_res)
-
            ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
        except:
            ang_int=0

        #plt.plot(var_res)
        #plt.show()
-
        ##plt.plot(mom3_res)
        ##plt.show()
        #print(ang_int,'ang_int111')
@ -1888,20 +1758,14 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
                ##plt.imshow(img_rot)
                ##plt.show()
                img_rot[img_rot!=0]=1
-                #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
                try:
                    var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
-
                except:
                    var_spectrum=0
-
                var_res.append(var_spectrum)

-
-
            try:
                var_res=np.array(var_res)
-
                ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
            except:
                ang_int=0
@ -1918,7 +1782,6 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
                ##plt.imshow(img_rot)
                ##plt.show()
                img_rot[img_rot!=0]=1
-                #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
                try:
                    var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
                    #print(indexer,'indexer')
@ -1926,12 +1789,8 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
                    var_spectrum=0

                var_res.append(var_spectrum)
-
-
-
            try:
                var_res=np.array(var_res)
-
                ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
            except:
                ang_int=0
--- a/qurator/eynollah/utils/xml.py
+++ b/qurator/eynollah/utils/xml.py
@ -0,0 +1,62 @@
+from lxml import etree as ET
+
+NAMESPACES = {}
+NAMESPACES['page'] = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"
+NAMESPACES['xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
+NAMESPACES[None] = NAMESPACES['page']
+
+def create_page_xml(imageFilename, height, width):
+    pcgts = ET.Element("PcGts", nsmap=NAMESPACES)
+
+    pcgts.set("{%s}schemaLocation" % NAMESPACES['xsi'], NAMESPACES['page'])
+
+    metadata = ET.SubElement(pcgts, "Metadata")
+
+    author = ET.SubElement(metadata, "Creator")
+    author.text = "SBB_QURATOR"
+
+    created = ET.SubElement(metadata, "Created")
+    created.text = "2019-06-17T18:15:12"
+
+    changetime = ET.SubElement(metadata, "LastChange")
+    changetime.text = "2019-06-17T18:15:12"
+
+    page = ET.SubElement(pcgts, "Page")
+
+    page.set("imageFilename", imageFilename)
+    page.set("imageHeight", str(height))
+    page.set("imageWidth", str(width))
+    page.set("type", "content")
+    page.set("readingDirection", "left-to-right")
+    page.set("textLineOrder", "top-to-bottom")
+
+    return pcgts, page
+
+def add_textequiv(parent, text=''):
+    textequiv = ET.SubElement(parent, 'TextEquiv')
+    unireg = ET.SubElement(textequiv, 'Unicode')
+    unireg.text = text
+
+def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals):
+    """
+    XXX side-effect: extends id_of_marginalia
+    """
+    region_order = ET.SubElement(page, 'ReadingOrder')
+    region_order_sub = ET.SubElement(region_order, 'OrderedGroup')
+    region_order_sub.set('id', "ro357564684568544579089")
+    indexer_region = 0
+    for vj in order_of_texts:
+        name = "coord_text_%s" % vj
+        name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
+        name.set('index', str(indexer_region))
+        name.set('regionRef', id_of_texts[vj])
+        indexer_region += 1
+    for vm in range(len(found_polygons_marginals)):
+        id_of_marginalia.append('r%s' % indexer_region)
+        name = "coord_text_%s" % indexer_region
+        name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
+        name.set('index', str(indexer_region))
+        name.set('regionRef', 'r%s' % indexer_region)
+        indexer_region += 1
+    return id_of_marginalia
+
--- a/qurator/eynollah/writer.py
+++ b/qurator/eynollah/writer.py
@ -0,0 +1,272 @@
+# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
+from pathlib import Path
+import os.path
+
+from .utils.xml import create_page_xml, add_textequiv, xml_reading_order
+
+from ocrd_utils import getLogger
+from lxml import etree as ET
+import numpy as np
+
+class EynollahXmlWriter():
+
+    def __init__(self, *, dir_out, image_filename, curved_line):
+        self.logger = getLogger('eynollah.writer')
+        self.dir_out = dir_out
+        self.image_filename = image_filename
+        self.image_filename_stem = Path(Path(image_filename).name).stem
+        self.curved_line = curved_line
+        self.scale_x = None # XXX set outside __init__
+        self.scale_y = None # XXX set outside __init__
+        self.height_org = None # XXX set outside __init__
+        self.width_org = None # XXX set outside __init__
+
+    def calculate_page_coords(self, cont_page):
+        self.logger.debug('enter calculate_page_coords')
+        points_page_print = ""
+        for _, contour in enumerate(cont_page[0]):
+            if len(contour) == 2:
+                points_page_print += str(int((contour[0]) / self.scale_x))
+                points_page_print += ','
+                points_page_print += str(int((contour[1]) / self.scale_y))
+            else:
+                points_page_print += str(int((contour[0][0]) / self.scale_x))
+                points_page_print += ','
+                points_page_print += str(int((contour[0][1] ) / self.scale_y))
+            points_page_print = points_page_print + ' '
+        return points_page_print[:-1]
+
+    def serialize_lines_in_marginal(self, marginal, all_found_texline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, id_indexer_l):
+        for j in range(len(all_found_texline_polygons_marginals[marginal_idx])):
+            textline = ET.SubElement(marginal, 'TextLine')
+            textline.set('id', 'l%s' % id_indexer_l)
+            id_indexer_l += 1
+            coord = ET.SubElement(textline, 'Coords')
+            add_textequiv(textline)
+            points_co = ''
+            for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])):
+                if not self.curved_line:
+                    if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
+                    else:
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0])/self.scale_y))
+                if self.curved_line and np.abs(slopes_marginals[marginal_idx]) <= 45:
+                    if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y))
+                    else:
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y))
+                        
+                elif self.curved_line and np.abs(slopes_marginals[marginal_idx]) > 45:
+                    if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
+                    else:
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
+
+                if l < len(all_found_texline_polygons_marginals[marginal_idx][j]) - 1:
+                    points_co += ' '
+            coord.set('points',points_co)
+        return id_indexer_l
+
+    def serialize_lines_in_region(self, textregion, all_found_texline_polygons, region_idx, page_coord, all_box_coord, slopes, id_indexer_l):
+        self.logger.debug('enter serialize_lines_in_region')
+        for j in range(len(all_found_texline_polygons[region_idx])):
+            textline = ET.SubElement(textregion, 'TextLine')
+            textline.set('id', 'l%s' % id_indexer_l)
+            id_indexer_l += 1
+            coord = ET.SubElement(textline, 'Coords')
+            add_textequiv(textline)
+
+            points_co = ''
+            for l in range(len(all_found_texline_polygons[region_idx][j])):
+                if not self.curved_line:
+                    if len(all_found_texline_polygons[region_idx][j][l])==2:
+                        textline_x_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0] + all_box_coord[region_idx][2] + page_coord[2]) / self.scale_x))
+                        textline_y_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][1] + all_box_coord[region_idx][0] + page_coord[0]) / self.scale_y))
+                    else:
+                        textline_x_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0][0] + all_box_coord[region_idx][2] + page_coord[2]) / self.scale_x))
+                        textline_y_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0][1] + all_box_coord[region_idx][0] + page_coord[0]) / self.scale_y))
+                    points_co += str(textline_x_coord)
+                    points_co += ','
+                    points_co += str(textline_y_coord)
+
+                if self.curved_line and np.abs(slopes[region_idx]) <= 45:
+                    if len(all_found_texline_polygons[region_idx][j][l]) == 2:
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][1] + page_coord[0]) / self.scale_y))
+                    else:
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][0] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][1] + page_coord[0])/self.scale_y))
+                elif self.curved_line and np.abs(slopes[region_idx]) > 45:
+                    if len(all_found_texline_polygons[region_idx][j][l])==2:
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0] + all_box_coord[region_idx][2]+page_coord[2])/self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][1] + all_box_coord[region_idx][0]+page_coord[0])/self.scale_y))
+                    else:
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][0] + all_box_coord[region_idx][2]+page_coord[2])/self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][1] + all_box_coord[region_idx][0]+page_coord[0])/self.scale_y))
+
+                if l < len(all_found_texline_polygons[region_idx][j]) - 1:
+                    points_co += ' '
+            coord.set('points',points_co)
+        return id_indexer_l
+
+    def write_pagexml(self, pcgts):
+        self.logger.info("filename stem: '%s'", self.image_filename_stem)
+        tree = ET.ElementTree(pcgts)
+        tree.write(os.path.join(self.dir_out, self.image_filename_stem) + ".xml")
+
+    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page):
+        self.logger.debug('enter build_pagexml_no_full_layout')
+
+        # create the file structure
+        pcgts, page = create_page_xml(self.image_filename, self.height_org, self.width_org)
+        page_print_sub = ET.SubElement(page, "Border")
+        coord_page = ET.SubElement(page_print_sub, "Coords")
+        coord_page.set('points', self.calculate_page_coords(cont_page))
+
+        id_of_marginalia = []
+        id_indexer = 0
+        id_indexer_l = 0
+        if len(found_polygons_text_region) > 0:
+            id_of_marginalia = xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals)
+            for mm in range(len(found_polygons_text_region)):
+                textregion = ET.SubElement(page, 'TextRegion')
+                textregion.set('id', 'r%s' % id_indexer)
+                id_indexer += 1
+                textregion.set('type', 'paragraph')
+                coord_text = ET.SubElement(textregion, 'Coords')
+                coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
+                id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l)
+                add_textequiv(textregion)
+
+        for mm in range(len(found_polygons_marginals)):
+            marginal = ET.SubElement(page, 'TextRegion')
+            marginal.set('id', id_of_marginalia[mm])
+            marginal.set('type', 'marginalia')
+            coord_text = ET.SubElement(marginal, 'Coords')
+            coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord))
+            id_indexer_l = self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, id_indexer_l)
+
+        id_indexer = len(found_polygons_text_region) + len(found_polygons_marginals)
+        for mm in range(len(found_polygons_text_region_img)):
+            textregion = ET.SubElement(page, 'ImageRegion')
+            textregion.set('id', 'r%s' % id_indexer)
+            id_indexer += 1
+            coord_text = ET.SubElement(textregion, 'Coords')
+            points_co = ''
+            for lmm in range(len(found_polygons_text_region_img[mm])):
+                points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
+                points_co += ','
+                points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
+                if lmm < len(found_polygons_text_region_img[mm]) - 1:
+                    points_co += ' '
+            coord_text.set('points', points_co)
+
+        return pcgts
+
+    def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page):
+        self.logger.debug('enter build_pagexml_full_layout')
+
+        # create the file structure
+        pcgts, page = create_page_xml(self.image_filename, self.height_org, self.width_org)
+        page_print_sub = ET.SubElement(page, "Border")
+        coord_page = ET.SubElement(page_print_sub, "Coords")
+        coord_page.set('points', self.calculate_page_coords(cont_page))
+
+        id_indexer = 0
+        id_indexer_l = 0
+        id_of_marginalia = []
+
+        if len(found_polygons_text_region) > 0:
+            id_of_marginalia = xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals)
+            for mm in range(len(found_polygons_text_region)):
+                textregion=ET.SubElement(page, 'TextRegion')
+                textregion.set('id', 'r%s' % id_indexer)
+                id_indexer += 1
+                textregion.set('type', 'paragraph')
+                coord_text = ET.SubElement(textregion, 'Coords')
+                coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
+                id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l)
+                add_textequiv(textregion)
+
+        self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
+        if len(found_polygons_text_region_h) > 0:
+            for mm in range(len(found_polygons_text_region_h)):
+                textregion=ET.SubElement(page, 'TextRegion')
+                textregion.set('id', 'r%s' % id_indexer)
+                id_indexer += 1
+                textregion.set('type','header')
+                coord_text = ET.SubElement(textregion, 'Coords')
+                coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_h, mm, page_coord))
+                id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes, id_indexer_l)
+                add_textequiv(textregion)
+
+        if len(found_polygons_drop_capitals) > 0:
+            id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals)
+            for mm in range(len(found_polygons_drop_capitals)):
+                textregion=ET.SubElement(page, 'TextRegion')
+                textregion.set('id',' r%s' % id_indexer)
+                id_indexer += 1
+                textregion.set('type', 'drop-capital')
+                coord_text = ET.SubElement(textregion, 'Coords')
+                coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals, mm, page_coord))
+                add_textequiv(textregion)
+
+        for mm in range(len(found_polygons_marginals)):
+            marginal = ET.SubElement(page, 'TextRegion')
+            add_textequiv(textregion)
+            marginal.set('id', id_of_marginalia[mm])
+            marginal.set('type', 'marginalia')
+            coord_text = ET.SubElement(marginal, 'Coords')
+            coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord))
+            id_indexer_l = self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, id_indexer_l)
+
+        id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals) + len(found_polygons_drop_capitals)
+        for mm in range(len(found_polygons_text_region_img)):
+            textregion=ET.SubElement(page, 'ImageRegion')
+            textregion.set('id', 'r%s' % id_indexer)
+            id_indexer += 1
+            coord_text = ET.SubElement(textregion, 'Coords')
+            coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_img, mm, page_coord))
+
+        for mm in range(len(found_polygons_tables)):
+            textregion = ET.SubElement(page, 'TableRegion')
+            textregion.set('id', 'r%s' %id_indexer)
+            id_indexer += 1
+            coord_text = ET.SubElement(textregion, 'Coords')
+            coord_text.set('points', self.calculate_polygon_coords(found_polygons_tables, mm, page_coord))
+
+        return pcgts
+
+    def calculate_polygon_coords(self, contour_list, i, page_coord):
+        self.logger.debug('enter calculate_polygon_coords')
+        coords = ''
+        for j in range(len(contour_list[i])):
+            if len(contour_list[i][j]) == 2:
+                coords += str(int((contour_list[i][j][0] + page_coord[2]) / self.scale_x))
+                coords += ','
+                coords += str(int((contour_list[i][j][1] + page_coord[0]) / self.scale_y))
+            else:
+                coords += str(int((contour_list[i][j][0][0] + page_coord[2]) / self.scale_x))
+                coords += ','
+                coords += str(int((contour_list[i][j][0][1] + page_coord[0]) / self.scale_y))
+
+            if j < len(contour_list[i]) - 1:
+                coords=coords + ' '
+        return coords
+
--- a/sbb_newspapers_org_image/eynollah.py
+++ b/sbb_newspapers_org_image/eynollah.py
--- a/sbb_newspapers_org_image/unused.py
+++ b/sbb_newspapers_org_image/unused.py
--- a/sbb_newspapers_org_image/utils/xml.py
+++ b/sbb_newspapers_org_image/utils/xml.py
@ -1,34 +0,0 @@
-from lxml import etree as ET
-
-NAMESPACES = {}
-NAMESPACES['page'] = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"
-NAMESPACES['xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
-NAMESPACES[None] = NAMESPACES['page']
-
-def create_page_xml(imageFilename, height, width):
-    pcgts = ET.Element("PcGts", nsmap=NAMESPACES)
-
-    pcgts.set("{%s}schemaLocation" % NAMESPACES['xsi'], NAMESPACES['page'])
-
-    metadata = ET.SubElement(pcgts, "Metadata")
-
-    author = ET.SubElement(metadata, "Creator")
-    author.text = "SBB_QURATOR"
-
-    created = ET.SubElement(metadata, "Created")
-    created.text = "2019-06-17T18:15:12"
-
-    changetime = ET.SubElement(metadata, "LastChange")
-    changetime.text = "2019-06-17T18:15:12"
-
-    page = ET.SubElement(pcgts, "Page")
-
-    page.set("imageFilename", imageFilename)
-    page.set("imageHeight", str(height))
-    page.set("imageWidth", str(width))
-    page.set("type", "content")
-    page.set("readingDirection", "left-to-right")
-    page.set("textLineOrder", "top-to-bottom")
-
-    return pcgts, page
-
--- a/setup.py
+++ b/setup.py
@ -10,12 +10,13 @@ setup(
    author='Vahid Rezanezhad',
    url='https://github.com/qurator-spk/eynollah',
    license='Apache License 2.0',
-    packages=find_packages(),
+    namespace_packages=['qurator'],
+    packages=find_packages(exclude=['tests']),
    install_requires=install_requires,
    entry_points={
        'console_scripts': [
-            'eynollah=sbb_newspapers_org_image.cli:main',
-            # 'ocrd-eynollah=eynollah.ocrd_cli:cli',
+            'eynollah=qurator.eynollah.cli:main',
+            # 'ocrd-eynollah=qurator.eynollah.ocrd_cli:cli',
        ]
    },
 )
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/base.py
+++ b/tests/base.py
@ -0,0 +1,54 @@
+# pylint: disable=unused-import
+
+from os.path import dirname, realpath
+from os import chdir
+import sys
+import logging
+import io
+import collections
+from unittest import TestCase as VanillaTestCase, skip, main as unittests_main
+import pytest
+from ocrd_utils import disableLogging, initLogging
+
+def main(fn=None):
+    if fn:
+        sys.exit(pytest.main([fn]))
+    else:
+        unittests_main()
+
+class TestCase(VanillaTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        chdir(dirname(realpath(__file__)) + '/..')
+
+    def setUp(self):
+        disableLogging()
+        initLogging()
+
+class CapturingTestCase(TestCase):
+    """
+    A TestCase that needs to capture stderr/stdout and invoke click CLI.
+    """
+
+    @pytest.fixture(autouse=True)
+    def _setup_pytest_capfd(self, capfd):
+        self.capfd = capfd
+
+    def invoke_cli(self, cli, args):
+        """
+        Substitution for click.CliRunner.invooke that works together nicely
+        with unittests/pytest capturing stdout/stderr.
+        """
+        self.capture_out_err()  # XXX snapshot just before executing the CLI
+        code = 0
+        sys.argv[1:] = args # XXX necessary because sys.argv reflects pytest args not cli args
+        try:
+            cli.main(args=args)
+        except SystemExit as e:
+            code = e.code
+        out, err = self.capture_out_err()
+        return code, out, err
+
+    def capture_out_err(self):
+        return self.capfd.readouterr()
--- a/tests/test_dpi.py
+++ b/tests/test_dpi.py
@ -0,0 +1,10 @@
+from pathlib import Path
+from qurator.eynollah.utils.pil_cv2 import check_dpi
+from tests.base import main
+
+def test_dpi():
+    fpath = Path(__file__).parent.joinpath('resources', 'kant_aufklaerung_1784_0020.tif')
+    assert 300 == check_dpi(str(fpath))
+
+if __name__ == '__main__':
+    main(__file__)
--- a/tests/test_run.py
+++ b/tests/test_run.py
@ -0,0 +1,24 @@
+from os import environ
+from pathlib import Path
+from ocrd_utils import pushd_popd
+from tests.base import CapturingTestCase as TestCase, main
+from qurator.eynollah.cli import main as eynollah_cli
+
+testdir = Path(__file__).parent.resolve()
+
+EYNOLLAH_MODELS = environ.get('EYNOLLAH_MODELS', str(testdir.joinpath('..', 'models_eynollah').resolve()))
+
+class TestEynollahRun(TestCase):
+
+    def test_full_run(self):
+        with pushd_popd(tempdir=True) as tempdir:
+            code, out, err = self.invoke_cli(eynollah_cli, [
+                '-m', EYNOLLAH_MODELS,
+                '-i', str(testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')),
+                '-o', tempdir
+            ])
+            print(code, out, err)
+            assert not code
+
+if __name__ == '__main__':
+    main(__file__)
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@ -1,7 +1,7 @@
 def test_utils_import():
-    import sbb_newspapers_org_image.utils
-    import sbb_newspapers_org_image.utils.contour
-    import sbb_newspapers_org_image.utils.drop_capitals
-    import sbb_newspapers_org_image.utils.drop_capitals
-    import sbb_newspapers_org_image.utils.is_nan
-    import sbb_newspapers_org_image.utils.rotate
+    import qurator.eynollah.utils
+    import qurator.eynollah.utils.contour
+    import qurator.eynollah.utils.drop_capitals
+    import qurator.eynollah.utils.drop_capitals
+    import qurator.eynollah.utils.is_nan
+    import qurator.eynollah.utils.rotate
--- a/tests/test_xml.py
+++ b/tests/test_xml.py
@ -1,5 +1,5 @@
 from lxml import etree as ET
-from sbb_newspapers_org_image.utils.xml import create_page_xml, NAMESPACES
+from qurator.eynollah.utils.xml import create_page_xml, NAMESPACES

 def tostring(el):
    return ET.tostring(el).decode('utf-8')
				`@ -0,0 +1 @@`
				`__import__("pkg_resources").declare_namespace(__name__)`