Merge pull request #19 from qurator-spk/refactor-cntd

Refactor cntd
2025-07-18 23:39:54 +02:00 · 2021-03-01 07:40:09 -05:00 · 2021-03-01 07:40:09 -05:00 · 932c3fb479
commit 932c3fb479
parent 8ab50a5486 58c4403e13
30 changed files with 2526 additions and 7352 deletions
--- a/.github/workflows/test-eynollah.yml
+++ b/.github/workflows/test-eynollah.yml
@ -31,5 +31,6 @@ jobs:
      run: |
        python -m pip install --upgrade pip
        pip install .
        pip install -r requirements-test.txt
    - name: Test with pytest
-      run: echo success # make test
+      run: make test
--- a/3
+++ b/3
@ -1,3 +1,6 @@
 EYNOLLAH_MODELS ?= $(PWD)/models_eynollah
 export EYNOLLAH_MODELS
 # BEGIN-EVAL makefile-parser --make-help Makefile
 help:
--- a/README.md
+++ b/README.md
@ -88,6 +88,21 @@ eynollah \
 The tool does accept and works better on original images (RGB format) than binarized images.
 ### `--full-layout` vs `--no-full-layout`
 Here are the difference in elements detected depending on the `--full-layout`/`--no-full-layout` command line flags:
 |                          | `--full-layout` | `--no-full-layout` |
 | ---                      | ---             | ---                |
 | reading order            | x               | x                  |
 | header regions           | x               | -                  |
 | text regions             | x               | x                  |
 | text regions / text line | x               | x                  |
 | drop-capitals            | x               | -                  |
 | marginals                | x               | x                  |
 | marginals / text line    | x               | x                  |
 | image region             | x               | x                  |
 ### How to use
 First of all, this model makes use of up to 9 trained models which are responsible for different operations like size detection, column classification, image enhancement, page extraction, main layout detection, full layout detection and textline detection. But this does not mean that all 9 models are always required for every document. Based on the document characteristics and parameters specified, different scenarios can be applied.
--- a/sbb_newspapers_org_image/.gitkeep
+++ b/sbb_newspapers_org_image/.gitkeep
--- a/qurator/init.py
+++ b/qurator/init.py
@ -0,0 +1 @@
 __import__("pkg_resources").declare_namespace(__name__)
--- a/sbb_newspapers_org_image/init.py
+++ b/sbb_newspapers_org_image/init.py
--- a/sbb_newspapers_org_image/cli.py
+++ b/sbb_newspapers_org_image/cli.py
@ -1,16 +1,23 @@
 import sys
 import click
-from sbb_newspapers_org_image.eynollah import eynollah
+from ocrd_utils import initLogging, setOverrideLogLevel
 from qurator.eynollah.eynollah import Eynollah
@click.command()
@click.option(
-    "--image", "-i", help="image filename", type=click.Path(exists=True, dir_okay=False)
+    "--image",
    "-i",
    help="image filename",
    type=click.Path(exists=True, dir_okay=False),
    required=True,
 )
@click.option(
    "--out",
    "-o",
    help="directory to write output xml data",
    type=click.Path(exists=True, file_okay=False),
    required=True,
 )
@click.option(
    "--model",
@ -43,35 +50,47 @@ from sbb_newspapers_org_image.eynollah import eynollah
    type=click.Path(exists=True, file_okay=False),
 )
@click.option(
-    "--allow_enhancement",
+    "--enable-plotting/--disable-plotting",
-    "-ae",
+    "-ep/-noep",
    is_flag=True,
    help="If set, will plot intermediary files and images",
 )
@click.option(
    "--allow-enhancement/--no-allow-enhancement",
    "-ae/-noae",
    is_flag=True,
    help="if this parameter set to true, this tool would check that input image need resizing and enhancement or not. If so output of resized and enhanced image and corresponding layout data will be written in out directory",
 )
@click.option(
-    "--curved_line",
+    "--curved-line/--no-curvedline",
-    "-cl",
+    "-cl/-nocl",
    is_flag=True,
    help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectabgle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.",
 )
@click.option(
-    "--full_layout",
+    "--full-layout/--no-full-layout",
-    "-fl",
+    "-fl/-nofl",
    is_flag=True,
    help="if this parameter set to true, this tool will try to return all elements of layout.",
 )
@click.option(
-    "--allow_scaling",
+    "--allow_scaling/--no-allow-scaling",
-    "-as",
+    "-as/-noas",
    is_flag=True,
    help="if this parameter set to true, this tool would check the scale and if needed it will scale it to perform better layout detection",
 )
@click.option(
-    "--headers_off",
+    "--headers-off/--headers-on",
-    "-ho",
+    "-ho/-noho",
    is_flag=True,
    help="if this parameter set to true, this tool would ignore headers role in reading order",
 )
@click.option(
    "--log-level",
    "-l",
    type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
    help="Override log level globally to this",
 )
 def main(
    image,
    out,
@ -80,13 +99,24 @@ def main(
    save_layout,
    save_deskewed,
    save_all,
    enable_plotting,
    allow_enhancement,
    curved_line,
    full_layout,
    allow_scaling,
    headers_off,
    log_level
 ):
-    eynollah(
+    if log_level:
        setOverrideLogLevel(log_level)
    initLogging()
    if not enable_plotting and (save_layout or save_deskewed or save_all or save_images):
        print("Error: You used one of -sl, -sd, -sa or -si but did not enable plotting with -ep")
        sys.exit(1)
    elif enable_plotting and not (save_layout or save_deskewed or save_all or save_images):
        print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa or -si")
        sys.exit(1)
    eynollah = Eynollah(
        image,
        None,
        out,
@ -95,13 +125,15 @@ def main(
        save_layout,
        save_deskewed,
        save_all,
        enable_plotting,
        allow_enhancement,
        curved_line,
        full_layout,
        allow_scaling,
        headers_off,
-    ).run()
+    )
-
+    pcgts = eynollah.run()
    eynollah.writer.write_pagexml(pcgts)
 if __name__ == "__main__":
    main()
--- a/qurator/eynollah/eynollah.py
+++ b/qurator/eynollah/eynollah.py
--- a/qurator/eynollah/plot.py
+++ b/qurator/eynollah/plot.py
@ -0,0 +1,169 @@
 import matplotlib.pyplot as plt
 import matplotlib.patches as mpatches
 import numpy as np
 import os.path
 import cv2
 from scipy.ndimage import gaussian_filter1d
 from .utils import crop_image_inside_box
 from .utils.rotate import rotyate_image_different
 from .utils.resize import resize_image
 class EynollahPlotter():
    """
    Class collecting all the plotting and image writing methods
    """
    def __init__(
        self,
        *,
        dir_of_all,
        dir_of_deskewed,
        dir_of_layout,
        dir_of_cropped_images,
        image_filename,
        image_filename_stem,
        image_org=None,
        scale_x=1,
        scale_y=1,
    ):
        self.dir_of_all = dir_of_all
        self.dir_of_layout = dir_of_layout
        self.dir_of_cropped_images = dir_of_cropped_images
        self.dir_of_deskewed = dir_of_deskewed
        self.image_filename = image_filename
        self.image_filename_stem = image_filename_stem
        # XXX TODO hacky these cannot be set at init time
        self.image_org = image_org
        self.scale_x = scale_x
        self.scale_y = scale_y
    def save_plot_of_layout_main(self, text_regions_p, image_page):
        if self.dir_of_layout is not None:
            values = np.unique(text_regions_p[:, :])
            # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
            pixels=['Background' , 'Main text'  , 'Image' , 'Separator','Marginalia']
            values_indexes = [0, 1, 2, 3, 4]
            plt.figure(figsize=(40, 40))
            plt.rcParams["font.size"] = "40"
            im = plt.imshow(text_regions_p[:, :])
            colors = [im.cmap(im.norm(value)) for value in values]
            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
            plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout_main.png"))
    def save_plot_of_layout_main_all(self, text_regions_p, image_page):
        if self.dir_of_all is not None:
            values = np.unique(text_regions_p[:, :])
            # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
            pixels=['Background' , 'Main text'  , 'Image' , 'Separator','Marginalia']
            values_indexes = [0, 1, 2, 3, 4]
            plt.figure(figsize=(80, 40))
            plt.rcParams["font.size"] = "40"
            plt.subplot(1, 2, 1)
            plt.imshow(image_page)
            plt.subplot(1, 2, 2)
            im = plt.imshow(text_regions_p[:, :])
            colors = [im.cmap(im.norm(value)) for value in values]
            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_main_and_page.png"))
    def save_plot_of_layout(self, text_regions_p, image_page):
        if self.dir_of_layout is not None:
            values = np.unique(text_regions_p[:, :])
            # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
            pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator"]
            values_indexes = [0, 1, 2, 8, 4, 5, 6]
            plt.figure(figsize=(40, 40))
            plt.rcParams["font.size"] = "40"
            im = plt.imshow(text_regions_p[:, :])
            colors = [im.cmap(im.norm(value)) for value in values]
            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
            plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout.png"))
    def save_plot_of_layout_all(self, text_regions_p, image_page):
        if self.dir_of_all is not None:
            values = np.unique(text_regions_p[:, :])
            # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
            pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator"]
            values_indexes = [0, 1, 2, 8, 4, 5, 6]
            plt.figure(figsize=(80, 40))
            plt.rcParams["font.size"] = "40"
            plt.subplot(1, 2, 1)
            plt.imshow(image_page)
            plt.subplot(1, 2, 2)
            im = plt.imshow(text_regions_p[:, :])
            colors = [im.cmap(im.norm(value)) for value in values]
            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_and_page.png"))
    def save_plot_of_textlines(self, textline_mask_tot_ea, image_page):
        if self.dir_of_all is not None:
            values = np.unique(textline_mask_tot_ea[:, :])
            pixels = ["Background", "Textlines"]
            values_indexes = [0, 1]
            plt.figure(figsize=(80, 40))
            plt.rcParams["font.size"] = "40"
            plt.subplot(1, 2, 1)
            plt.imshow(image_page)
            plt.subplot(1, 2, 2)
            im = plt.imshow(textline_mask_tot_ea[:, :])
            colors = [im.cmap(im.norm(value)) for value in values]
            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_textline_and_page.png"))
    def save_deskewed_image(self, slope_deskew):
        if self.dir_of_all is not None:
            cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_org.png"), self.image_org)
        if self.dir_of_deskewed is not None:
            img_rotated = rotyate_image_different(self.image_org, slope_deskew)
            cv2.imwrite(os.path.join(self.dir_of_deskewed, self.image_filename_stem + "_deskewed.png"), img_rotated)
    def save_page_image(self, image_page):
        if self.dir_of_all is not None:
            cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_page.png"), image_page)
    def save_plot_of_textline_density(self, img_patch_org):
        if self.dir_of_all is not None:
            plt.figure(figsize=(80,40))
            plt.rcParams['font.size']='50'
            plt.subplot(1,2,1)
            plt.imshow(img_patch_org)
            plt.subplot(1,2,2)
            plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))),linewidth=8)
            plt.xlabel('Density of textline prediction in direction of X axis',fontsize=60)
            plt.ylabel('Height',fontsize=60)
            plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))])
            plt.gca().invert_yaxis()
            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_density_of_textline.png'))
    def save_plot_of_rotation_angle(self, angels, var_res):
        if self.dir_of_all is not None:
            plt.figure(figsize=(60,30))
            plt.rcParams['font.size']='50'
            plt.plot(angels,np.array(var_res),'-o',markersize=25,linewidth=4)
            plt.xlabel('angle',fontsize=50)
            plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50)
            plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))]  ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$')
            plt.legend(loc='best')
            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_rotation_angle.png'))
    def write_images_into_directory(self, img_contoures, image_page):
        if self.dir_of_cropped_images is not None:
            index = 0
            for cont_ind in img_contoures:
                x, y, w, h = cv2.boundingRect(cont_ind)
                box = [x, y, w, h]
                croped_page, page_coord = crop_image_inside_box(box, image_page)
                croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y), int(croped_page.shape[1] / self.scale_x))
                path = os.path.join(self.dir_of_cropped_images, self.image_filename_stem + "_" + str(index) + ".jpg")
                cv2.imwrite(path, croped_page)
                index += 1
--- a/sbb_newspapers_org_image/utils/init.py
+++ b/sbb_newspapers_org_image/utils/init.py
@ -299,24 +299,6 @@ def crop_image_inside_box(box, img_org_copy):
    image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]]
    return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]]
 def otsu_copy(img):
    img_r = np.zeros(img.shape)
    img1 = img[:, :, 0]
    img2 = img[:, :, 1]
    img3 = img[:, :, 2]
    # print(img.min())
    # print(img[:,:,0].min())
    # blur = cv2.GaussianBlur(img,(5,5))
    # ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    retval1, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    retval2, threshold2 = cv2.threshold(img2, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    retval3, threshold3 = cv2.threshold(img3, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    img_r[:, :, 0] = threshold1
    img_r[:, :, 1] = threshold1
    img_r[:, :, 2] = threshold1
    return img_r
 def otsu_copy_binary(img):
    img_r = np.zeros((img.shape[0], img.shape[1], 3))
    img1 = img[:, :, 0]
@ -373,241 +355,41 @@ def boosting_headers_by_longshot_region_segmentation(textregion_pre_p, textregio
 def find_num_col_deskew(regions_without_seperators, sigma_, multiplier=3.8):
    regions_without_seperators_0 = regions_without_seperators[:,:].sum(axis=1)
    z = gaussian_filter1d(regions_without_seperators_0, sigma_)
    return np.std(z)
    ##meda_n_updown=regions_without_seperators_0[len(regions_without_seperators_0)::-1]
    ##first_nonzero=(next((i for i, x in enumerate(regions_without_seperators_0) if x), 0))
    ##last_nonzero=(next((i for i, x in enumerate(meda_n_updown) if x), 0))
    ##last_nonzero=len(regions_without_seperators_0)-last_nonzero
    y=regions_without_seperators_0#[first_nonzero:last_nonzero]
    ##y_help=np.zeros(len(y)+20)
    ##y_help[10:len(y)+10]=y
    ##x=np.array( range(len(y)) )
    ##zneg_rev=-y_help+np.max(y_help)
    ##zneg=np.zeros(len(zneg_rev)+20)
    ##zneg[10:len(zneg_rev)+10]=zneg_rev
    z=gaussian_filter1d(y, sigma_)
    ###zneg= gaussian_filter1d(zneg, sigma_)
    ###peaks_neg, _ = find_peaks(zneg, height=0)
    ###peaks, _ = find_peaks(z, height=0)
    ###peaks_neg=peaks_neg-10-10
    ####print(np.std(z),'np.std(z)np.std(z)np.std(z)')
    #####plt.plot(z)
    #####plt.show()
    #####plt.imshow(regions_without_seperators)
    #####plt.show()
    ###"""
    ###last_nonzero=last_nonzero-0#100
    ###first_nonzero=first_nonzero+0#+100
    ###peaks_neg=peaks_neg[(peaks_neg>first_nonzero) & (peaks_neg<last_nonzero)]
    ###peaks=peaks[(peaks>.06*regions_without_seperators.shape[1]) & (peaks<0.94*regions_without_seperators.shape[1])]
    ###"""
    ###interest_pos=z[peaks]
    ###interest_pos=interest_pos[interest_pos>10]
    ###interest_neg=z[peaks_neg]
    ###min_peaks_pos=np.mean(interest_pos)
    ###min_peaks_neg=0#np.min(interest_neg)
    ###dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier
    ####print(interest_pos)
    ###grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
    ###interest_neg_fin=interest_neg[(interest_neg<grenze)]
    ###peaks_neg_fin=peaks_neg[(interest_neg<grenze)]
    ###interest_neg_fin=interest_neg[(interest_neg<grenze)]
    ###"""
    ###if interest_neg[0]<0.1:
        ###interest_neg=interest_neg[1:]
    ###if interest_neg[len(interest_neg)-1]<0.1:
        ###interest_neg=interest_neg[:len(interest_neg)-1]
    ###min_peaks_pos=np.min(interest_pos)
    ###min_peaks_neg=0#np.min(interest_neg)
    ###dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier
    ###grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
    ###"""
    ####interest_neg_fin=interest_neg#[(interest_neg<grenze)]
    ####peaks_neg_fin=peaks_neg#[(interest_neg<grenze)]
    ####interest_neg_fin=interest_neg#[(interest_neg<grenze)]
    ###num_col=(len(interest_neg_fin))+1
    ###p_l=0
    ###p_u=len(y)-1
    ###p_m=int(len(y)/2.)
    ###p_g_l=int(len(y)/3.)
    ###p_g_u=len(y)-int(len(y)/3.)
    ###diff_peaks=np.abs( np.diff(peaks_neg_fin) )
    ###diff_peaks_annormal=diff_peaks[diff_peaks<30]
    #print(len(interest_neg_fin),np.mean(interest_neg_fin))
    return np.std(z)#interest_neg_fin,np.std(z)
 def return_hor_spliter_by_index_for_without_verticals(peaks_neg_fin_t, x_min_hor_some, x_max_hor_some):
    # print(peaks_neg_fin_t,x_min_hor_some,x_max_hor_some)
    arg_min_hor_sort = np.argsort(x_min_hor_some)
    x_min_hor_some_sort = np.sort(x_min_hor_some)
    x_max_hor_some_sort = x_max_hor_some[arg_min_hor_sort]
    arg_minmax = np.array(range(len(peaks_neg_fin_t)))
    indexer_lines = []
    indexes_to_delete = []
    indexer_lines_deletions_len = []
    indexr_uniq_ind = []
    for i in range(len(x_min_hor_some_sort)):
        min_h = peaks_neg_fin_t - x_min_hor_some_sort[i]
        max_h = peaks_neg_fin_t - x_max_hor_some_sort[i]
        min_h[0] = min_h[0]  # +20
        max_h[len(max_h) - 1] = max_h[len(max_h) - 1] - 20
        min_h_neg = arg_minmax[(min_h < 0)]
        min_h_neg_n = min_h[min_h < 0]
        try:
            min_h_neg = [min_h_neg[np.argmax(min_h_neg_n)]]
        except:
            min_h_neg = []
        max_h_neg = arg_minmax[(max_h > 0)]
        max_h_neg_n = max_h[max_h > 0]
        if len(max_h_neg_n) > 0:
            max_h_neg = [max_h_neg[np.argmin(max_h_neg_n)]]
        else:
            max_h_neg = []
        if len(min_h_neg) > 0 and len(max_h_neg) > 0:
            deletions = list(range(min_h_neg[0] + 1, max_h_neg[0]))
            unique_delets_int = []
            # print(deletions,len(deletions),'delii')
            if len(deletions) > 0:
                for j in range(len(deletions)):
                    indexes_to_delete.append(deletions[j])
                    # print(deletions,indexes_to_delete,'badiii')
                    unique_delets = np.unique(indexes_to_delete)
                    # print(min_h_neg[0],unique_delets)
                    unique_delets_int = unique_delets[unique_delets < min_h_neg[0]]
                indexer_lines_deletions_len.append(len(deletions))
                indexr_uniq_ind.append([deletions])
            else:
                indexer_lines_deletions_len.append(0)
                indexr_uniq_ind.append(-999)
            index_line_true = min_h_neg[0] - len(unique_delets_int)
            # print(index_line_true)
            if index_line_true > 0 and min_h_neg[0] >= 2:
                index_line_true = index_line_true
            else:
                index_line_true = min_h_neg[0]
            indexer_lines.append(index_line_true)
            if len(unique_delets_int) > 0:
                for dd in range(len(unique_delets_int)):
                    indexes_to_delete.append(unique_delets_int[dd])
        else:
            indexer_lines.append(-999)
            indexer_lines_deletions_len.append(-999)
            indexr_uniq_ind.append(-999)
    peaks_true = []
    for m in range(len(peaks_neg_fin_t)):
        if m in indexes_to_delete:
            pass
        else:
            peaks_true.append(peaks_neg_fin_t[m])
    return indexer_lines, peaks_true, arg_min_hor_sort, indexer_lines_deletions_len, indexr_uniq_ind
 def find_num_col(regions_without_seperators, multiplier=3.8):
    regions_without_seperators_0 = regions_without_seperators[:, :].sum(axis=0)
    ##plt.plot(regions_without_seperators_0)
    ##plt.show()
    sigma_ = 35  # 70#35
    meda_n_updown = regions_without_seperators_0[len(regions_without_seperators_0) :: -1]
    first_nonzero = next((i for i, x in enumerate(regions_without_seperators_0) if x), 0)
    last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0)
    # print(last_nonzero)
    # print(isNaN(last_nonzero))
    # last_nonzero=0#halalikh
    last_nonzero = len(regions_without_seperators_0) - last_nonzero
    y = regions_without_seperators_0  # [first_nonzero:last_nonzero]
    y_help = np.zeros(len(y) + 20)
    y_help[10 : len(y) + 10] = y
    x = np.array(range(len(y)))
    zneg_rev = -y_help + np.max(y_help)
    zneg = np.zeros(len(zneg_rev) + 20)
    zneg[10 : len(zneg_rev) + 10] = zneg_rev
    z = gaussian_filter1d(y, sigma_)
    zneg = gaussian_filter1d(zneg, sigma_)
    peaks_neg, _ = find_peaks(zneg, height=0)
    peaks, _ = find_peaks(z, height=0)
    peaks_neg = peaks_neg - 10 - 10
    last_nonzero = last_nonzero - 100
    first_nonzero = first_nonzero + 200
    peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & (peaks_neg < last_nonzero)]
    peaks = peaks[(peaks > 0.06 * regions_without_seperators.shape[1]) & (peaks < 0.94 * regions_without_seperators.shape[1])]
    peaks_neg = peaks_neg[(peaks_neg > 370) & (peaks_neg < (regions_without_seperators.shape[1] - 370))]
    # print(peaks)
    interest_pos = z[peaks]
    interest_pos = interest_pos[interest_pos > 10]
    # plt.plot(z)
    # plt.show()
    interest_neg = z[peaks_neg]
@ -621,9 +403,7 @@ def find_num_col(regions_without_seperators, multiplier=3.8):
    min_peaks_neg = 0  # np.min(interest_neg)
    # print(np.min(interest_pos),np.max(interest_pos),np.max(interest_pos)/np.min(interest_pos),'minmax')
    # $print(min_peaks_pos)
    dis_talaei = (min_peaks_pos - min_peaks_neg) / multiplier
    # print(interest_pos)
    grenze = min_peaks_pos - dis_talaei  # np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0
    # print(interest_neg,'interest_neg')
@ -650,15 +430,11 @@ def find_num_col(regions_without_seperators, multiplier=3.8):
        if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or (peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or ((peaks_neg_fin[0] + 200) < p_m and peaks_neg_fin[1] < p_m) or ((peaks_neg_fin[0] - 200) > p_m and peaks_neg_fin[1] > p_m):
            num_col = 1
            peaks_neg_fin = []
        else:
            pass
    if num_col == 2:
        if (peaks_neg_fin[0] > p_g_u) or (peaks_neg_fin[0] < p_g_l):
            num_col = 1
            peaks_neg_fin = []
        else:
            pass
    ##print(len(peaks_neg_fin))
@ -673,7 +449,7 @@ def find_num_col(regions_without_seperators, multiplier=3.8):
    for i in range(len(peaks_neg_fin)):
        if i == 0:
            forest.append(peaks_neg_fin[i])
-        if i < (len(peaks_neg_fin) - 1):
+        if i < len(peaks_neg_fin) - 1:
            if diff_peaks[i] <= cut_off:
                forest.append(peaks_neg_fin[i + 1])
            if diff_peaks[i] > cut_off:
@ -687,7 +463,7 @@ def find_num_col(regions_without_seperators, multiplier=3.8):
            if not isNaN(forest[np.argmin(z[forest])]):
                peaks_neg_true.append(forest[np.argmin(z[forest])])
-    num_col = (len(peaks_neg_true)) + 1
+    num_col = len(peaks_neg_true) + 1
    p_l = 0
    p_u = len(y) - 1
    p_m = int(len(y) / 2.0)
@ -706,15 +482,11 @@ def find_num_col(regions_without_seperators, multiplier=3.8):
            peaks_neg_true = [peaks_neg_true[0]]
        elif (peaks_neg_true[1] < p_g_u and peaks_neg_true[1] > p_g_l) and (peaks_neg_true[0] < p_quarter):
            peaks_neg_true = [peaks_neg_true[1]]
        else:
            pass
    if num_col == 2:
        if (peaks_neg_true[0] > p_g_u) or (peaks_neg_true[0] < p_g_l):
            num_col = 1
            peaks_neg_true = []
        else:
            pass
    diff_peaks_annormal = diff_peaks[diff_peaks < 360]
@ -732,9 +504,7 @@ def find_num_col(regions_without_seperators, multiplier=3.8):
                else:
                    peaks_neg_fin_new.append(peaks_neg_fin[ii + 1])
-            elif (ii - 1) in arg_help_ann:
+            elif (ii - 1) not in arg_help_ann:
                pass
            else:
                peaks_neg_fin_new.append(peaks_neg_fin[ii])
    else:
        peaks_neg_fin_new = peaks_neg_fin
@ -948,28 +718,6 @@ def find_num_col_by_vertical_lines(regions_without_seperators, multiplier=3.8):
    # print(peaks,'peaksnew')
    return peaks
 def delete_seperator_around(spliter_y, peaks_neg, image_by_region):
    # format of subboxes box=[x1, x2 , y1, y2]
    if len(image_by_region.shape) == 3:
        for i in range(len(spliter_y) - 1):
            for j in range(1, len(peaks_neg[i]) - 1):
                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0] == 6] = 0
                image_by_region[spliter_y[i] : spliter_y[i + 1], peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 1] == 6] = 0
                image_by_region[spliter_y[i] : spliter_y[i + 1], peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 2] == 6] = 0
                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0] == 7] = 0
                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 1] == 7] = 0
                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 2] == 7] = 0
    else:
        for i in range(len(spliter_y) - 1):
            for j in range(1, len(peaks_neg[i]) - 1):
                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])] == 6] = 0
                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])] == 7] = 0
    return image_by_region
 def return_regions_without_seperators(regions_pre):
    kernel = np.ones((5, 5), np.uint8)
    regions_without_seperators = ((regions_pre[:, :] != 6) & (regions_pre[:, :] != 0)) * 1
@ -1432,166 +1180,6 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
    return final_indexers_sorted, matrix_of_orders, final_types, final_index_type
 def implent_law_head_main_not_parallel(text_regions):
    # print(text_regions.shape)
    text_indexes = [1, 2]  # 1: main text , 2: header , 3: comments
    for t_i in text_indexes:
        textline_mask = text_regions[:, :] == t_i
        textline_mask = textline_mask * 255.0
        textline_mask = textline_mask.astype(np.uint8)
        textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2)
        kernel = np.ones((5, 5), np.uint8)
        # print(type(textline_mask),np.unique(textline_mask),textline_mask.shape)
        imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY)
        ret, thresh = cv2.threshold(imgray, 0, 255, 0)
        if t_i == 1:
            contours_main, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            # print(type(contours_main))
            areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
            M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
            cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
            cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
            x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
            x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
            y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
            y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
            # print(contours_main[0],np.shape(contours_main[0]),contours_main[0][:,0,0])
        elif t_i == 2:
            contours_header, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            # print(type(contours_header))
            areas_header = np.array([cv2.contourArea(contours_header[j]) for j in range(len(contours_header))])
            M_header = [cv2.moments(contours_header[j]) for j in range(len(contours_header))]
            cx_header = [(M_header[j]["m10"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
            cy_header = [(M_header[j]["m01"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))]
            x_min_header = np.array([np.min(contours_header[j][:, 0, 0]) for j in range(len(contours_header))])
            x_max_header = np.array([np.max(contours_header[j][:, 0, 0]) for j in range(len(contours_header))])
            y_min_header = np.array([np.min(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
            y_max_header = np.array([np.max(contours_header[j][:, 0, 1]) for j in range(len(contours_header))])
    args = np.array(range(1, len(cy_header) + 1))
    args_main = np.array(range(1, len(cy_main) + 1))
    for jj in range(len(contours_main)):
        headers_in_main = [(cy_header > y_min_main[jj]) & ((cy_header < y_max_main[jj]))]
        mains_in_main = [(cy_main > y_min_main[jj]) & ((cy_main < y_max_main[jj]))]
        args_log = args * headers_in_main
        res = args_log[args_log > 0]
        res_true = res - 1
        args_log_main = args_main * mains_in_main
        res_main = args_log_main[args_log_main > 0]
        res_true_main = res_main - 1
        if len(res_true) > 0:
            sum_header = np.sum(areas_header[res_true])
            sum_main = np.sum(areas_main[res_true_main])
            if sum_main > sum_header:
                cnt_int = [contours_header[j] for j in res_true]
                text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(1, 1, 1))
            else:
                cnt_int = [contours_main[j] for j in res_true_main]
                text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(2, 2, 2))
    for jj in range(len(contours_header)):
        main_in_header = [(cy_main > y_min_header[jj]) & ((cy_main < y_max_header[jj]))]
        header_in_header = [(cy_header > y_min_header[jj]) & ((cy_header < y_max_header[jj]))]
        args_log = args_main * main_in_header
        res = args_log[args_log > 0]
        res_true = res - 1
        args_log_header = args * header_in_header
        res_header = args_log_header[args_log_header > 0]
        res_true_header = res_header - 1
        if len(res_true) > 0:
            sum_header = np.sum(areas_header[res_true_header])
            sum_main = np.sum(areas_main[res_true])
            if sum_main > sum_header:
                cnt_int = [contours_header[j] for j in res_true_header]
                text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(1, 1, 1))
            else:
                cnt_int = [contours_main[j] for j in res_true]
                text_regions = cv2.fillPoly(text_regions, pts=cnt_int, color=(2, 2, 2))
    return text_regions
 def return_hor_spliter_by_index(peaks_neg_fin_t, x_min_hor_some, x_max_hor_some):
    arg_min_hor_sort = np.argsort(x_min_hor_some)
    x_min_hor_some_sort = np.sort(x_min_hor_some)
    x_max_hor_some_sort = x_max_hor_some[arg_min_hor_sort]
    arg_minmax = np.array(range(len(peaks_neg_fin_t)))
    indexer_lines = []
    indexes_to_delete = []
    indexer_lines_deletions_len = []
    indexr_uniq_ind = []
    for i in range(len(x_min_hor_some_sort)):
        min_h = peaks_neg_fin_t - x_min_hor_some_sort[i]
        max_h = peaks_neg_fin_t - x_max_hor_some_sort[i]
        min_h[0] = min_h[0]  # +20
        max_h[len(max_h) - 1] = max_h[len(max_h) - 1]  ##-20
        min_h_neg = arg_minmax[(min_h < 0) & (np.abs(min_h) < 360)]
        max_h_neg = arg_minmax[(max_h >= 0) & (np.abs(max_h) < 360)]
        if len(min_h_neg) > 0 and len(max_h_neg) > 0:
            deletions = list(range(min_h_neg[0] + 1, max_h_neg[0]))
            unique_delets_int = []
            # print(deletions,len(deletions),'delii')
            if len(deletions) > 0:
                # print(deletions,len(deletions),'delii2')
                for j in range(len(deletions)):
                    indexes_to_delete.append(deletions[j])
                    # print(deletions,indexes_to_delete,'badiii')
                    unique_delets = np.unique(indexes_to_delete)
                    # print(min_h_neg[0],unique_delets)
                    unique_delets_int = unique_delets[unique_delets < min_h_neg[0]]
                indexer_lines_deletions_len.append(len(deletions))
                indexr_uniq_ind.append([deletions])
            else:
                indexer_lines_deletions_len.append(0)
                indexr_uniq_ind.append(-999)
            index_line_true = min_h_neg[0] - len(unique_delets_int)
            # print(index_line_true)
            if index_line_true > 0 and min_h_neg[0] >= 2:
                index_line_true = index_line_true
            else:
                index_line_true = min_h_neg[0]
            indexer_lines.append(index_line_true)
            if len(unique_delets_int) > 0:
                for dd in range(len(unique_delets_int)):
                    indexes_to_delete.append(unique_delets_int[dd])
        else:
            indexer_lines.append(-999)
            indexer_lines_deletions_len.append(-999)
            indexr_uniq_ind.append(-999)
    peaks_true = []
    for m in range(len(peaks_neg_fin_t)):
        if m in indexes_to_delete:
            pass
        else:
            peaks_true.append(peaks_neg_fin_t[m])
    return indexer_lines, peaks_true, arg_min_hor_sort, indexer_lines_deletions_len, indexr_uniq_ind
 def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(img_p_in_ver, img_in_hor,num_col_classifier):
    #img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2)
    img_p_in_ver=img_p_in_ver.astype(np.uint8)
--- a/sbb_newspapers_org_image/utils/contour.py
+++ b/sbb_newspapers_org_image/utils/contour.py
@ -26,39 +26,6 @@ def find_contours_mean_y_diff(contours_main):
    cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
    return np.mean(np.diff(np.sort(np.array(cy_main))))
 def find_features_of_contours(contours_main):
    areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
    M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
    cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
    cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
    x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
    x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
    y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
    y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
    return y_min_main, y_max_main, areas_main
 def return_contours_of_interested_region_and_bounding_box(region_pre_p, pixel):
    # pixels of images are identified by 5
    cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
    cnts_images = cnts_images.astype(np.uint8)
    cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
    imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(imgray, 0, 255, 0)
    contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours_imgs = return_parent_contours(contours_imgs, hiearchy)
    contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=0.0003)
    boxes = []
    for jj in range(len(contours_imgs)):
        x, y, w, h = cv2.boundingRect(contours_imgs[jj])
        boxes.append([int(x), int(y), int(w), int(h)])
    return contours_imgs, boxes
 def get_text_region_boxes_by_given_contours(contours):
@ -76,7 +43,6 @@ def get_text_region_boxes_by_given_contours(contours):
 def filter_contours_area_of_image(image, contours, hirarchy, max_area, min_area):
    found_polygons_early = list()
    jv = 0
    for c in contours:
        if len(c) < 3:  # A polygon cannot have less than 3 points
@ -89,23 +55,6 @@ def filter_contours_area_of_image(image, contours, hirarchy, max_area, min_area)
        jv += 1
    return found_polygons_early
 def filter_contours_area_of_image_interiors(image, contours, hirarchy, max_area, min_area):
    found_polygons_early = list()
    jv = 0
    for c in contours:
        if len(c) < 3:  # A polygon cannot have less than 3 points
            continue
        polygon = geometry.Polygon([point[0] for point in c])
        area = polygon.area
        if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hirarchy[0][jv][3] != -1:
            # print(c[0][0][1])
            found_polygons_early.append(np.array([point for point in polygon.exterior.coords], dtype=np.uint))
        jv += 1
    return found_polygons_early
 def filter_contours_area_of_image_tables(image, contours, hirarchy, max_area, min_area):
    found_polygons_early = list()
@ -236,15 +185,6 @@ def return_contours_of_interested_textline(region_pre_p, pixel):
    contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=0.000000003)
    return contours_imgs
 def return_bonding_box_of_contours(cnts):
    boxes_tot = []
    for i in range(len(cnts)):
        x, y, w, h = cv2.boundingRect(cnts[i])
        box = [x, y, w, h]
        boxes_tot.append(box)
    return boxes_tot
 def return_contours_of_image(image):
    if len(image.shape) == 2:
--- a/sbb_newspapers_org_image/utils/drop_capitals.py
+++ b/sbb_newspapers_org_image/utils/drop_capitals.py
--- a/sbb_newspapers_org_image/utils/is_nan.py
+++ b/sbb_newspapers_org_image/utils/is_nan.py
--- a/sbb_newspapers_org_image/utils/marginals.py
+++ b/sbb_newspapers_org_image/utils/marginals.py
--- a/qurator/eynollah/utils/pil_cv2.py
+++ b/qurator/eynollah/utils/pil_cv2.py
@ -0,0 +1,24 @@
 from PIL import Image
 import numpy as np
 from ocrd_models import OcrdExif
 from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, cvtColor, imread
 # from sbb_binarization
 def cv2pil(img):
    return Image.fromarray(img.astype('uint8'))
 def pil2cv(img):
    # from ocrd/workspace.py
    color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else  COLOR_RGB2BGR
    pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
    return cvtColor(pil_as_np_array, color_conversion)
 def check_dpi(image_filename):
    exif = OcrdExif(Image.open(image_filename))
    print(exif.to_xml())
    resolution = exif.resolution
    if exif.resolutionUnit == 'cm':
        resolution /= 2.54
    return int(resolution)
--- a/sbb_newspapers_org_image/utils/resize.py
+++ b/sbb_newspapers_org_image/utils/resize.py
--- a/sbb_newspapers_org_image/utils/rotate.py
+++ b/sbb_newspapers_org_image/utils/rotate.py
--- a/sbb_newspapers_org_image/utils/separate_lines.py
+++ b/sbb_newspapers_org_image/utils/separate_lines.py
@ -1,4 +1,3 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import cv2
 from scipy.signal import find_peaks
@ -14,31 +13,8 @@ from .contour import (
 )
 from .is_nan import isNaN
 from . import (
    boosting_headers_by_longshot_region_segmentation,
    crop_image_inside_box,
    find_features_of_lines,
    find_num_col,
    find_num_col_by_vertical_lines,
    find_num_col_deskew,
    find_num_col_only_image,
    isNaN,
    otsu_copy,
    otsu_copy_binary,
    return_hor_spliter_by_index_for_without_verticals,
    delete_seperator_around,
    return_regions_without_seperators,
    put_drop_out_from_only_drop_model,
    putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
    check_any_text_region_in_model_one_is_main_or_header,
    small_textlines_to_parent_adherence2,
    order_and_id_of_texts,
    order_of_regions,
    implent_law_head_main_not_parallel,
    return_hor_spliter_by_index,
    combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new,
    return_points_with_boundies,
    find_number_of_columns_in_document,
    return_boxes_of_images_by_order_of_reading_new,
 )
 def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
@ -1395,7 +1371,7 @@ def seperate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
    return None, cont_final
-def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, slope_first, add_boxes_coor_into_textlines=False):
+def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False):
    textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
    textline_mask = textline_mask.astype(np.uint8)
@ -1485,7 +1461,7 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest
    return contours_rotated_clean
-def seperate_lines_new2(img_path, thetha, num_col, slope_region, dir_of_all, f_name):
+def seperate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None):
    if num_col == 1:
        num_patches = int(img_path.shape[1] / 200.0)
@ -1536,7 +1512,7 @@ def seperate_lines_new2(img_path, thetha, num_col, slope_region, dir_of_all, f_n
        sigma = 2
        try:
-            slope_xline = return_deskew_slop(img_xline, sigma, dir_of_all=dir_of_all, f_name=f_name)
+            slope_xline = return_deskew_slop(img_xline, sigma, plotter=plotter)
        except:
            slope_xline = 0
@ -1593,29 +1569,10 @@ def seperate_lines_new2(img_path, thetha, num_col, slope_region, dir_of_all, f_n
    # plt.show()
    return img_patch_ineterst_revised
-def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=None, f_name=None):
+def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):
-
+    if main_page and plotter:
-    if main_page and dir_of_all is not None:
+        plotter.save_plot_of_textline_density(img_patch_org)
        plt.figure(figsize=(80,40))
        plt.rcParams['font.size']='50'
        plt.subplot(1,2,1)
        plt.imshow(img_patch_org)
        plt.subplot(1,2,2)
        plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))),linewidth=8)
        plt.xlabel('Density of textline prediction in direction of X axis',fontsize=60)
        plt.ylabel('Height',fontsize=60)
        plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))])
        plt.gca().invert_yaxis()
        plt.savefig(os.path.join(dir_of_all, f_name+'_density_of_textline.png'))
    #print(np.max(img_patch_org.sum(axis=0)) ,np.max(img_patch_org.sum(axis=1)),'axislar')
    #img_patch_org=resize_image(img_patch_org,int(img_patch_org.shape[0]*2.5),int(img_patch_org.shape[1]/2.5))
    #print(np.max(img_patch_org.sum(axis=0)) ,np.max(img_patch_org.sum(axis=1)),'axislar2')
    img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1]))
    img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
@ -1647,53 +1604,23 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
        #plt.show()
        angels=np.array([-45, 0 , 45 , 90 , ])#np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45])
        #res=[]
        #num_of_peaks=[]
        #index_cor=[]
        var_res=[]
        #indexer=0
        for rot in angels:
            img_rot=rotate_image(img_resized,rot)
            #plt.imshow(img_rot)
            #plt.show()
            img_rot[img_rot!=0]=1
            #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
            #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3  )
            #print(var_spectrum,'var_spectrum')
            try:
                var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
                ##print(rot,var_spectrum,'var_spectrum')
                #res_me=np.mean(neg_peaks)
                #if res_me==0:
                    #res_me=1000000000000000000000
                #else:
                    #pass
                #res_num=len(neg_peaks)
            except:
                #res_me=1000000000000000000000
                #res_num=0
                var_spectrum=0
            #if self.isNaN(res_me):
                #pass
            #else:
                #res.append( res_me )
                #var_res.append(var_spectrum)
                #num_of_peaks.append( res_num )
                #index_cor.append(indexer)
            #indexer=indexer+1
            var_res.append(var_spectrum)
            #index_cor.append(indexer)
            #indexer=indexer+1
        try:
            var_res=np.array(var_res)
            ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
        except:
            ang_int=0
@ -1701,32 +1628,19 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
        angels=np.linspace(ang_int-22.5,ang_int+22.5,100)
        #res=[]
        #num_of_peaks=[]
        #index_cor=[]
        var_res=[]
        for rot in angels:
            img_rot=rotate_image(img_resized,rot)
            ##plt.imshow(img_rot)
            ##plt.show()
            img_rot[img_rot!=0]=1
            #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
            try:
                var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
            except:
                var_spectrum=0
            var_res.append(var_spectrum)
        try:
            var_res=np.array(var_res)
            ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
        except:
            ang_int=0
@ -1745,9 +1659,6 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
            #plt.imshow(img_rot)
            #plt.show()
            img_rot[img_rot!=0]=1
            #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
            #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3  )
            #print(var_spectrum,'var_spectrum')
            try:
@ -1759,51 +1670,30 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
            var_res.append(var_spectrum)
-        if dir_of_all is not None:
+        if plotter:
-            #print('galdi?')
+            plotter.save_plot_of_rotation_angle(angels, var_res)
            plt.figure(figsize=(60,30))
            plt.rcParams['font.size']='50'
            plt.plot(angels,np.array(var_res),'-o',markersize=25,linewidth=4)
            plt.xlabel('angle',fontsize=50)
            plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50)
            plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))]  ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$')
            plt.legend(loc='best')
            plt.savefig(os.path.join(dir_of_all,f_name+'_rotation_angle.png'))
        try:
            var_res=np.array(var_res)
            ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
        except:
            ang_int=0
        early_slope_edge=11
        if abs(ang_int)>early_slope_edge and ang_int<0:
            angels=np.linspace(-90,-12,100)
            var_res=[]
            for rot in angels:
                img_rot=rotate_image(img_resized,rot)
                ##plt.imshow(img_rot)
                ##plt.show()
                img_rot[img_rot!=0]=1
                #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
                try:
                    var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
                except:
                    var_spectrum=0
                var_res.append(var_spectrum)
            try:
                var_res=np.array(var_res)
                ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
            except:
                ang_int=0
@ -1811,67 +1701,47 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
        elif abs(ang_int)>early_slope_edge and ang_int>0:
            angels=np.linspace(90,12,100)
            var_res=[]
            for rot in angels:
                img_rot=rotate_image(img_resized,rot)
                ##plt.imshow(img_rot)
                ##plt.show()
                img_rot[img_rot!=0]=1
                #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
                try:
                    var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
                    #print(indexer,'indexer')
                except:
                    var_spectrum=0
                var_res.append(var_spectrum)
            try:
                var_res=np.array(var_res)
                ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
            except:
                ang_int=0
    else:
        angels=np.linspace(-25,25,60)
        var_res=[]
        indexer=0
        for rot in angels:
            img_rot=rotate_image(img_resized,rot)
            #plt.imshow(img_rot)
            #plt.show()
            img_rot[img_rot!=0]=1
            #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
            #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3  )
            #print(var_spectrum,'var_spectrum')
            try:
                var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
            except:
                var_spectrum=0
            var_res.append(var_spectrum)
        try:
            var_res=np.array(var_res)
            ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
        except:
            ang_int=0
        #plt.plot(var_res)
        #plt.show()
        ##plt.plot(mom3_res)
        ##plt.show()
        #print(ang_int,'ang_int111')
@ -1888,20 +1758,14 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
                ##plt.imshow(img_rot)
                ##plt.show()
                img_rot[img_rot!=0]=1
                #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
                try:
                    var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
                except:
                    var_spectrum=0
                var_res.append(var_spectrum)
            try:
                var_res=np.array(var_res)
                ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
            except:
                ang_int=0
@ -1918,7 +1782,6 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
                ##plt.imshow(img_rot)
                ##plt.show()
                img_rot[img_rot!=0]=1
                #res_me=np.mean(self.find_num_col_deskew(img_rot,sigma_des,2.0  ))
                try:
                    var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3  )
                    #print(indexer,'indexer')
@ -1926,12 +1789,8 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, dir_of_all=Non
                    var_spectrum=0
                var_res.append(var_spectrum)
            try:
                var_res=np.array(var_res)
                ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
            except:
                ang_int=0
--- a/qurator/eynollah/utils/xml.py
+++ b/qurator/eynollah/utils/xml.py
@ -0,0 +1,62 @@
 from lxml import etree as ET
 NAMESPACES = {}
 NAMESPACES['page'] = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"
 NAMESPACES['xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
 NAMESPACES[None] = NAMESPACES['page']
 def create_page_xml(imageFilename, height, width):
    pcgts = ET.Element("PcGts", nsmap=NAMESPACES)
    pcgts.set("{%s}schemaLocation" % NAMESPACES['xsi'], NAMESPACES['page'])
    metadata = ET.SubElement(pcgts, "Metadata")
    author = ET.SubElement(metadata, "Creator")
    author.text = "SBB_QURATOR"
    created = ET.SubElement(metadata, "Created")
    created.text = "2019-06-17T18:15:12"
    changetime = ET.SubElement(metadata, "LastChange")
    changetime.text = "2019-06-17T18:15:12"
    page = ET.SubElement(pcgts, "Page")
    page.set("imageFilename", imageFilename)
    page.set("imageHeight", str(height))
    page.set("imageWidth", str(width))
    page.set("type", "content")
    page.set("readingDirection", "left-to-right")
    page.set("textLineOrder", "top-to-bottom")
    return pcgts, page
 def add_textequiv(parent, text=''):
    textequiv = ET.SubElement(parent, 'TextEquiv')
    unireg = ET.SubElement(textequiv, 'Unicode')
    unireg.text = text
 def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals):
    """
    XXX side-effect: extends id_of_marginalia
    """
    region_order = ET.SubElement(page, 'ReadingOrder')
    region_order_sub = ET.SubElement(region_order, 'OrderedGroup')
    region_order_sub.set('id', "ro357564684568544579089")
    indexer_region = 0
    for vj in order_of_texts:
        name = "coord_text_%s" % vj
        name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
        name.set('index', str(indexer_region))
        name.set('regionRef', id_of_texts[vj])
        indexer_region += 1
    for vm in range(len(found_polygons_marginals)):
        id_of_marginalia.append('r%s' % indexer_region)
        name = "coord_text_%s" % indexer_region
        name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
        name.set('index', str(indexer_region))
        name.set('regionRef', 'r%s' % indexer_region)
        indexer_region += 1
    return id_of_marginalia
--- a/qurator/eynollah/writer.py
+++ b/qurator/eynollah/writer.py
@ -0,0 +1,272 @@
 # pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
 from pathlib import Path
 import os.path
 from .utils.xml import create_page_xml, add_textequiv, xml_reading_order
 from ocrd_utils import getLogger
 from lxml import etree as ET
 import numpy as np
 class EynollahXmlWriter():
    def __init__(self, *, dir_out, image_filename, curved_line):
        self.logger = getLogger('eynollah.writer')
        self.dir_out = dir_out
        self.image_filename = image_filename
        self.image_filename_stem = Path(Path(image_filename).name).stem
        self.curved_line = curved_line
        self.scale_x = None # XXX set outside __init__
        self.scale_y = None # XXX set outside __init__
        self.height_org = None # XXX set outside __init__
        self.width_org = None # XXX set outside __init__
    def calculate_page_coords(self, cont_page):
        self.logger.debug('enter calculate_page_coords')
        points_page_print = ""
        for _, contour in enumerate(cont_page[0]):
            if len(contour) == 2:
                points_page_print += str(int((contour[0]) / self.scale_x))
                points_page_print += ','
                points_page_print += str(int((contour[1]) / self.scale_y))
            else:
                points_page_print += str(int((contour[0][0]) / self.scale_x))
                points_page_print += ','
                points_page_print += str(int((contour[0][1] ) / self.scale_y))
            points_page_print = points_page_print + ' '
        return points_page_print[:-1]
    def serialize_lines_in_marginal(self, marginal, all_found_texline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, id_indexer_l):
        for j in range(len(all_found_texline_polygons_marginals[marginal_idx])):
            textline = ET.SubElement(marginal, 'TextLine')
            textline.set('id', 'l%s' % id_indexer_l)
            id_indexer_l += 1
            coord = ET.SubElement(textline, 'Coords')
            add_textequiv(textline)
            points_co = ''
            for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])):
                if not self.curved_line:
                    if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
                        points_co += ','
                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
                    else:
                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
                        points_co += ','
                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0])/self.scale_y))
                if self.curved_line and np.abs(slopes_marginals[marginal_idx]) <= 45:
                    if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x))
                        points_co += ','
                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y))
                    else:
                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x))
                        points_co += ','
                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y))
                elif self.curved_line and np.abs(slopes_marginals[marginal_idx]) > 45:
                    if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
                        points_co += ','
                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
                    else:
                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
                        points_co += ','
                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
                if l < len(all_found_texline_polygons_marginals[marginal_idx][j]) - 1:
                    points_co += ' '
            coord.set('points',points_co)
        return id_indexer_l
    def serialize_lines_in_region(self, textregion, all_found_texline_polygons, region_idx, page_coord, all_box_coord, slopes, id_indexer_l):
        self.logger.debug('enter serialize_lines_in_region')
        for j in range(len(all_found_texline_polygons[region_idx])):
            textline = ET.SubElement(textregion, 'TextLine')
            textline.set('id', 'l%s' % id_indexer_l)
            id_indexer_l += 1
            coord = ET.SubElement(textline, 'Coords')
            add_textequiv(textline)
            points_co = ''
            for l in range(len(all_found_texline_polygons[region_idx][j])):
                if not self.curved_line:
                    if len(all_found_texline_polygons[region_idx][j][l])==2:
                        textline_x_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0] + all_box_coord[region_idx][2] + page_coord[2]) / self.scale_x))
                        textline_y_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][1] + all_box_coord[region_idx][0] + page_coord[0]) / self.scale_y))
                    else:
                        textline_x_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0][0] + all_box_coord[region_idx][2] + page_coord[2]) / self.scale_x))
                        textline_y_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0][1] + all_box_coord[region_idx][0] + page_coord[0]) / self.scale_y))
                    points_co += str(textline_x_coord)
                    points_co += ','
                    points_co += str(textline_y_coord)
                if self.curved_line and np.abs(slopes[region_idx]) <= 45:
                    if len(all_found_texline_polygons[region_idx][j][l]) == 2:
                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0] + page_coord[2]) / self.scale_x))
                        points_co += ','
                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][1] + page_coord[0]) / self.scale_y))
                    else:
                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][0] + page_coord[2]) / self.scale_x))
                        points_co += ','
                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][1] + page_coord[0])/self.scale_y))
                elif self.curved_line and np.abs(slopes[region_idx]) > 45:
                    if len(all_found_texline_polygons[region_idx][j][l])==2:
                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0] + all_box_coord[region_idx][2]+page_coord[2])/self.scale_x))
                        points_co += ','
                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][1] + all_box_coord[region_idx][0]+page_coord[0])/self.scale_y))
                    else:
                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][0] + all_box_coord[region_idx][2]+page_coord[2])/self.scale_x))
                        points_co += ','
                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][1] + all_box_coord[region_idx][0]+page_coord[0])/self.scale_y))
                if l < len(all_found_texline_polygons[region_idx][j]) - 1:
                    points_co += ' '
            coord.set('points',points_co)
        return id_indexer_l
    def write_pagexml(self, pcgts):
        self.logger.info("filename stem: '%s'", self.image_filename_stem)
        tree = ET.ElementTree(pcgts)
        tree.write(os.path.join(self.dir_out, self.image_filename_stem) + ".xml")
    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page):
        self.logger.debug('enter build_pagexml_no_full_layout')
        # create the file structure
        pcgts, page = create_page_xml(self.image_filename, self.height_org, self.width_org)
        page_print_sub = ET.SubElement(page, "Border")
        coord_page = ET.SubElement(page_print_sub, "Coords")
        coord_page.set('points', self.calculate_page_coords(cont_page))
        id_of_marginalia = []
        id_indexer = 0
        id_indexer_l = 0
        if len(found_polygons_text_region) > 0:
            id_of_marginalia = xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals)
            for mm in range(len(found_polygons_text_region)):
                textregion = ET.SubElement(page, 'TextRegion')
                textregion.set('id', 'r%s' % id_indexer)
                id_indexer += 1
                textregion.set('type', 'paragraph')
                coord_text = ET.SubElement(textregion, 'Coords')
                coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
                id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l)
                add_textequiv(textregion)
        for mm in range(len(found_polygons_marginals)):
            marginal = ET.SubElement(page, 'TextRegion')
            marginal.set('id', id_of_marginalia[mm])
            marginal.set('type', 'marginalia')
            coord_text = ET.SubElement(marginal, 'Coords')
            coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord))
            id_indexer_l = self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, id_indexer_l)
        id_indexer = len(found_polygons_text_region) + len(found_polygons_marginals)
        for mm in range(len(found_polygons_text_region_img)):
            textregion = ET.SubElement(page, 'ImageRegion')
            textregion.set('id', 'r%s' % id_indexer)
            id_indexer += 1
            coord_text = ET.SubElement(textregion, 'Coords')
            points_co = ''
            for lmm in range(len(found_polygons_text_region_img[mm])):
                points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
                points_co += ','
                points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
                if lmm < len(found_polygons_text_region_img[mm]) - 1:
                    points_co += ' '
            coord_text.set('points', points_co)
        return pcgts
    def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page):
        self.logger.debug('enter build_pagexml_full_layout')
        # create the file structure
        pcgts, page = create_page_xml(self.image_filename, self.height_org, self.width_org)
        page_print_sub = ET.SubElement(page, "Border")
        coord_page = ET.SubElement(page_print_sub, "Coords")
        coord_page.set('points', self.calculate_page_coords(cont_page))
        id_indexer = 0
        id_indexer_l = 0
        id_of_marginalia = []
        if len(found_polygons_text_region) > 0:
            id_of_marginalia = xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals)
            for mm in range(len(found_polygons_text_region)):
                textregion=ET.SubElement(page, 'TextRegion')
                textregion.set('id', 'r%s' % id_indexer)
                id_indexer += 1
                textregion.set('type', 'paragraph')
                coord_text = ET.SubElement(textregion, 'Coords')
                coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
                id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l)
                add_textequiv(textregion)
        self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
        if len(found_polygons_text_region_h) > 0:
            for mm in range(len(found_polygons_text_region_h)):
                textregion=ET.SubElement(page, 'TextRegion')
                textregion.set('id', 'r%s' % id_indexer)
                id_indexer += 1
                textregion.set('type','header')
                coord_text = ET.SubElement(textregion, 'Coords')
                coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_h, mm, page_coord))
                id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes, id_indexer_l)
                add_textequiv(textregion)
        if len(found_polygons_drop_capitals) > 0:
            id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals)
            for mm in range(len(found_polygons_drop_capitals)):
                textregion=ET.SubElement(page, 'TextRegion')
                textregion.set('id',' r%s' % id_indexer)
                id_indexer += 1
                textregion.set('type', 'drop-capital')
                coord_text = ET.SubElement(textregion, 'Coords')
                coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals, mm, page_coord))
                add_textequiv(textregion)
        for mm in range(len(found_polygons_marginals)):
            marginal = ET.SubElement(page, 'TextRegion')
            add_textequiv(textregion)
            marginal.set('id', id_of_marginalia[mm])
            marginal.set('type', 'marginalia')
            coord_text = ET.SubElement(marginal, 'Coords')
            coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord))
            id_indexer_l = self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, id_indexer_l)
        id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals) + len(found_polygons_drop_capitals)
        for mm in range(len(found_polygons_text_region_img)):
            textregion=ET.SubElement(page, 'ImageRegion')
            textregion.set('id', 'r%s' % id_indexer)
            id_indexer += 1
            coord_text = ET.SubElement(textregion, 'Coords')
            coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_img, mm, page_coord))
        for mm in range(len(found_polygons_tables)):
            textregion = ET.SubElement(page, 'TableRegion')
            textregion.set('id', 'r%s' %id_indexer)
            id_indexer += 1
            coord_text = ET.SubElement(textregion, 'Coords')
            coord_text.set('points', self.calculate_polygon_coords(found_polygons_tables, mm, page_coord))
        return pcgts
    def calculate_polygon_coords(self, contour_list, i, page_coord):
        self.logger.debug('enter calculate_polygon_coords')
        coords = ''
        for j in range(len(contour_list[i])):
            if len(contour_list[i][j]) == 2:
                coords += str(int((contour_list[i][j][0] + page_coord[2]) / self.scale_x))
                coords += ','
                coords += str(int((contour_list[i][j][1] + page_coord[0]) / self.scale_y))
            else:
                coords += str(int((contour_list[i][j][0][0] + page_coord[2]) / self.scale_x))
                coords += ','
                coords += str(int((contour_list[i][j][0][1] + page_coord[0]) / self.scale_y))
            if j < len(contour_list[i]) - 1:
                coords=coords + ' '
        return coords
--- a/sbb_newspapers_org_image/eynollah.py
+++ b/sbb_newspapers_org_image/eynollah.py
--- a/sbb_newspapers_org_image/unused.py
+++ b/sbb_newspapers_org_image/unused.py
--- a/sbb_newspapers_org_image/utils/xml.py
+++ b/sbb_newspapers_org_image/utils/xml.py
@ -1,34 +0,0 @@
 from lxml import etree as ET
 NAMESPACES = {}
 NAMESPACES['page'] = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"
 NAMESPACES['xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
 NAMESPACES[None] = NAMESPACES['page']
 def create_page_xml(imageFilename, height, width):
    pcgts = ET.Element("PcGts", nsmap=NAMESPACES)
    pcgts.set("{%s}schemaLocation" % NAMESPACES['xsi'], NAMESPACES['page'])
    metadata = ET.SubElement(pcgts, "Metadata")
    author = ET.SubElement(metadata, "Creator")
    author.text = "SBB_QURATOR"
    created = ET.SubElement(metadata, "Created")
    created.text = "2019-06-17T18:15:12"
    changetime = ET.SubElement(metadata, "LastChange")
    changetime.text = "2019-06-17T18:15:12"
    page = ET.SubElement(pcgts, "Page")
    page.set("imageFilename", imageFilename)
    page.set("imageHeight", str(height))
    page.set("imageWidth", str(width))
    page.set("type", "content")
    page.set("readingDirection", "left-to-right")
    page.set("textLineOrder", "top-to-bottom")
    return pcgts, page
--- a/setup.py
+++ b/setup.py
@ -10,12 +10,13 @@ setup(
    author='Vahid Rezanezhad',
    url='https://github.com/qurator-spk/eynollah',
    license='Apache License 2.0',
-    packages=find_packages(),
+    namespace_packages=['qurator'],
    packages=find_packages(exclude=['tests']),
    install_requires=install_requires,
    entry_points={
        'console_scripts': [
-            'eynollah=sbb_newspapers_org_image.cli:main',
+            'eynollah=qurator.eynollah.cli:main',
-            # 'ocrd-eynollah=eynollah.ocrd_cli:cli',
+            # 'ocrd-eynollah=qurator.eynollah.ocrd_cli:cli',
        ]
    },
 )
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/base.py
+++ b/tests/base.py
@ -0,0 +1,54 @@
 # pylint: disable=unused-import
 from os.path import dirname, realpath
 from os import chdir
 import sys
 import logging
 import io
 import collections
 from unittest import TestCase as VanillaTestCase, skip, main as unittests_main
 import pytest
 from ocrd_utils import disableLogging, initLogging
 def main(fn=None):
    if fn:
        sys.exit(pytest.main([fn]))
    else:
        unittests_main()
 class TestCase(VanillaTestCase):
    @classmethod
    def setUpClass(cls):
        chdir(dirname(realpath(__file__)) + '/..')
    def setUp(self):
        disableLogging()
        initLogging()
 class CapturingTestCase(TestCase):
    """
    A TestCase that needs to capture stderr/stdout and invoke click CLI.
    """
    @pytest.fixture(autouse=True)
    def _setup_pytest_capfd(self, capfd):
        self.capfd = capfd
    def invoke_cli(self, cli, args):
        """
        Substitution for click.CliRunner.invooke that works together nicely
        with unittests/pytest capturing stdout/stderr.
        """
        self.capture_out_err()  # XXX snapshot just before executing the CLI
        code = 0
        sys.argv[1:] = args # XXX necessary because sys.argv reflects pytest args not cli args
        try:
            cli.main(args=args)
        except SystemExit as e:
            code = e.code
        out, err = self.capture_out_err()
        return code, out, err
    def capture_out_err(self):
        return self.capfd.readouterr()
--- a/tests/test_dpi.py
+++ b/tests/test_dpi.py
@ -0,0 +1,10 @@
 from pathlib import Path
 from qurator.eynollah.utils.pil_cv2 import check_dpi
 from tests.base import main
 def test_dpi():
    fpath = Path(__file__).parent.joinpath('resources', 'kant_aufklaerung_1784_0020.tif')
    assert 300 == check_dpi(str(fpath))
 if __name__ == '__main__':
    main(__file__)
--- a/tests/test_run.py
+++ b/tests/test_run.py
@ -0,0 +1,24 @@
 from os import environ
 from pathlib import Path
 from ocrd_utils import pushd_popd
 from tests.base import CapturingTestCase as TestCase, main
 from qurator.eynollah.cli import main as eynollah_cli
 testdir = Path(__file__).parent.resolve()
 EYNOLLAH_MODELS = environ.get('EYNOLLAH_MODELS', str(testdir.joinpath('..', 'models_eynollah').resolve()))
 class TestEynollahRun(TestCase):
    def test_full_run(self):
        with pushd_popd(tempdir=True) as tempdir:
            code, out, err = self.invoke_cli(eynollah_cli, [
                '-m', EYNOLLAH_MODELS,
                '-i', str(testdir.joinpath('resources/kant_aufklaerung_1784_0020.tif')),
                '-o', tempdir
            ])
            print(code, out, err)
            assert not code
 if __name__ == '__main__':
    main(__file__)
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@ -1,7 +1,7 @@
 def test_utils_import():
-    import sbb_newspapers_org_image.utils
+    import qurator.eynollah.utils
-    import sbb_newspapers_org_image.utils.contour
+    import qurator.eynollah.utils.contour
-    import sbb_newspapers_org_image.utils.drop_capitals
+    import qurator.eynollah.utils.drop_capitals
-    import sbb_newspapers_org_image.utils.drop_capitals
+    import qurator.eynollah.utils.drop_capitals
-    import sbb_newspapers_org_image.utils.is_nan
+    import qurator.eynollah.utils.is_nan
-    import sbb_newspapers_org_image.utils.rotate
+    import qurator.eynollah.utils.rotate
--- a/tests/test_xml.py
+++ b/tests/test_xml.py
@ -1,5 +1,5 @@
 from lxml import etree as ET
-from sbb_newspapers_org_image.utils.xml import create_page_xml, NAMESPACES
+from qurator.eynollah.utils.xml import create_page_xml, NAMESPACES
 def tostring(el):
    return ET.tostring(el).decode('utf-8')
		`@ -0,0 +1 @@`
							`__import__("pkg_resources").declare_namespace(__name__)`