rename package to qurator.eynollah

2026-05-01 03:32:00 +02:00 · 2021-02-24 18:49:05 +01:00 · 2021-02-24 18:49:05 +01:00 · 58c4403e13
commit 58c4403e13
parent a96d23712d
22 changed files with 15 additions and 13 deletions
--- a/qurator/.gitkeep
+++ b/qurator/.gitkeep
--- a/qurator/init.py
+++ b/qurator/init.py
@ -0,0 +1 @@
+__import__("pkg_resources").declare_namespace(__name__)
--- a/qurator/eynollah/init.py
+++ b/qurator/eynollah/init.py
@ -0,0 +1 @@
+
--- a/qurator/eynollah/cli.py
+++ b/qurator/eynollah/cli.py
@ -0,0 +1,139 @@
+import sys
+import click
+from ocrd_utils import initLogging, setOverrideLogLevel
+from qurator.eynollah.eynollah import Eynollah
+
+
+@click.command()
+@click.option(
+    "--image",
+    "-i",
+    help="image filename",
+    type=click.Path(exists=True, dir_okay=False),
+    required=True,
+)
+@click.option(
+    "--out",
+    "-o",
+    help="directory to write output xml data",
+    type=click.Path(exists=True, file_okay=False),
+    required=True,
+)
+@click.option(
+    "--model",
+    "-m",
+    help="directory of models",
+    type=click.Path(exists=True, file_okay=False),
+)
+@click.option(
+    "--save_images",
+    "-si",
+    help="if a directory is given, images in documents will be cropped and saved there",
+    type=click.Path(exists=True, file_okay=False),
+)
+@click.option(
+    "--save_layout",
+    "-sl",
+    help="if a directory is given, plot of layout will be saved there",
+    type=click.Path(exists=True, file_okay=False),
+)
+@click.option(
+    "--save_deskewed",
+    "-sd",
+    help="if a directory is given, deskewed image will be saved there",
+    type=click.Path(exists=True, file_okay=False),
+)
+@click.option(
+    "--save_all",
+    "-sa",
+    help="if a directory is given, all plots needed for documentation will be saved there",
+    type=click.Path(exists=True, file_okay=False),
+)
+@click.option(
+    "--enable-plotting/--disable-plotting",
+    "-ep/-noep",
+    is_flag=True,
+    help="If set, will plot intermediary files and images",
+)
+@click.option(
+    "--allow-enhancement/--no-allow-enhancement",
+    "-ae/-noae",
+    is_flag=True,
+    help="if this parameter set to true, this tool would check that input image need resizing and enhancement or not. If so output of resized and enhanced image and corresponding layout data will be written in out directory",
+)
+@click.option(
+    "--curved-line/--no-curvedline",
+    "-cl/-nocl",
+    is_flag=True,
+    help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectabgle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.",
+)
+@click.option(
+    "--full-layout/--no-full-layout",
+    "-fl/-nofl",
+    is_flag=True,
+    help="if this parameter set to true, this tool will try to return all elements of layout.",
+)
+@click.option(
+    "--allow_scaling/--no-allow-scaling",
+    "-as/-noas",
+    is_flag=True,
+    help="if this parameter set to true, this tool would check the scale and if needed it will scale it to perform better layout detection",
+)
+@click.option(
+    "--headers-off/--headers-on",
+    "-ho/-noho",
+    is_flag=True,
+    help="if this parameter set to true, this tool would ignore headers role in reading order",
+)
+@click.option(
+    "--log-level",
+    "-l",
+    type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
+    help="Override log level globally to this",
+)
+def main(
+    image,
+    out,
+    model,
+    save_images,
+    save_layout,
+    save_deskewed,
+    save_all,
+    enable_plotting,
+    allow_enhancement,
+    curved_line,
+    full_layout,
+    allow_scaling,
+    headers_off,
+    log_level
+):
+    if log_level:
+        setOverrideLogLevel(log_level)
+    initLogging()
+    if not enable_plotting and (save_layout or save_deskewed or save_all or save_images):
+        print("Error: You used one of -sl, -sd, -sa or -si but did not enable plotting with -ep")
+        sys.exit(1)
+    elif enable_plotting and not (save_layout or save_deskewed or save_all or save_images):
+        print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa or -si")
+        sys.exit(1)
+    eynollah = Eynollah(
+        image,
+        None,
+        out,
+        model,
+        save_images,
+        save_layout,
+        save_deskewed,
+        save_all,
+        enable_plotting,
+        allow_enhancement,
+        curved_line,
+        full_layout,
+        allow_scaling,
+        headers_off,
+    )
+    pcgts = eynollah.run()
+    eynollah.writer.write_pagexml(pcgts)
+
+if __name__ == "__main__":
+    main()
--- a/qurator/eynollah/eynollah.py
+++ b/qurator/eynollah/eynollah.py
--- a/qurator/eynollah/plot.py
+++ b/qurator/eynollah/plot.py
@ -0,0 +1,169 @@
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import numpy as np
+import os.path
+import cv2
+from scipy.ndimage import gaussian_filter1d
+
+from .utils import crop_image_inside_box
+from .utils.rotate import rotyate_image_different
+from .utils.resize import resize_image
+
+class EynollahPlotter():
+    """
+    Class collecting all the plotting and image writing methods
+    """
+
+    def __init__(
+        self,
+        *,
+        dir_of_all,
+        dir_of_deskewed,
+        dir_of_layout,
+        dir_of_cropped_images,
+        image_filename,
+        image_filename_stem,
+        image_org=None,
+        scale_x=1,
+        scale_y=1,
+    ):
+        self.dir_of_all = dir_of_all
+        self.dir_of_layout = dir_of_layout
+        self.dir_of_cropped_images = dir_of_cropped_images
+        self.dir_of_deskewed = dir_of_deskewed
+        self.image_filename = image_filename
+        self.image_filename_stem = image_filename_stem
+        # XXX TODO hacky these cannot be set at init time
+        self.image_org = image_org
+        self.scale_x = scale_x
+        self.scale_y = scale_y
+
+    def save_plot_of_layout_main(self, text_regions_p, image_page):
+        if self.dir_of_layout is not None:
+            values = np.unique(text_regions_p[:, :])
+            # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
+            pixels=['Background' , 'Main text'  , 'Image' , 'Separator','Marginalia']
+            values_indexes = [0, 1, 2, 3, 4]
+            plt.figure(figsize=(40, 40))
+            plt.rcParams["font.size"] = "40"
+            im = plt.imshow(text_regions_p[:, :])
+            colors = [im.cmap(im.norm(value)) for value in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
+            plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout_main.png"))
+        
+
+    def save_plot_of_layout_main_all(self, text_regions_p, image_page):
+        if self.dir_of_all is not None:
+            values = np.unique(text_regions_p[:, :])
+            # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
+            pixels=['Background' , 'Main text'  , 'Image' , 'Separator','Marginalia']
+            values_indexes = [0, 1, 2, 3, 4]
+            plt.figure(figsize=(80, 40))
+            plt.rcParams["font.size"] = "40"
+            plt.subplot(1, 2, 1)
+            plt.imshow(image_page)
+            plt.subplot(1, 2, 2)
+            im = plt.imshow(text_regions_p[:, :])
+            colors = [im.cmap(im.norm(value)) for value in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
+            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_main_and_page.png"))
+
+    def save_plot_of_layout(self, text_regions_p, image_page):
+        if self.dir_of_layout is not None:
+            values = np.unique(text_regions_p[:, :])
+            # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
+            pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator"]
+            values_indexes = [0, 1, 2, 8, 4, 5, 6]
+            plt.figure(figsize=(40, 40))
+            plt.rcParams["font.size"] = "40"
+            im = plt.imshow(text_regions_p[:, :])
+            colors = [im.cmap(im.norm(value)) for value in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
+            plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout.png"))
+
+    def save_plot_of_layout_all(self, text_regions_p, image_page):
+        if self.dir_of_all is not None:
+            values = np.unique(text_regions_p[:, :])
+            # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
+            pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator"]
+            values_indexes = [0, 1, 2, 8, 4, 5, 6]
+            plt.figure(figsize=(80, 40))
+            plt.rcParams["font.size"] = "40"
+            plt.subplot(1, 2, 1)
+            plt.imshow(image_page)
+            plt.subplot(1, 2, 2)
+            im = plt.imshow(text_regions_p[:, :])
+            colors = [im.cmap(im.norm(value)) for value in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
+            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_and_page.png"))
+
+    def save_plot_of_textlines(self, textline_mask_tot_ea, image_page):
+        if self.dir_of_all is not None:
+            values = np.unique(textline_mask_tot_ea[:, :])
+            pixels = ["Background", "Textlines"]
+            values_indexes = [0, 1]
+            plt.figure(figsize=(80, 40))
+            plt.rcParams["font.size"] = "40"
+            plt.subplot(1, 2, 1)
+            plt.imshow(image_page)
+            plt.subplot(1, 2, 2)
+            im = plt.imshow(textline_mask_tot_ea[:, :])
+            colors = [im.cmap(im.norm(value)) for value in values]
+            patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
+            plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
+            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_textline_and_page.png"))
+
+    def save_deskewed_image(self, slope_deskew):
+        if self.dir_of_all is not None:
+            cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_org.png"), self.image_org)
+        if self.dir_of_deskewed is not None:
+            img_rotated = rotyate_image_different(self.image_org, slope_deskew)
+            cv2.imwrite(os.path.join(self.dir_of_deskewed, self.image_filename_stem + "_deskewed.png"), img_rotated)
+
+    def save_page_image(self, image_page):
+        if self.dir_of_all is not None:
+            cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_page.png"), image_page)
+
+    def save_plot_of_textline_density(self, img_patch_org):
+        if self.dir_of_all is not None:
+            plt.figure(figsize=(80,40))
+            plt.rcParams['font.size']='50'
+            plt.subplot(1,2,1)
+            plt.imshow(img_patch_org)
+            plt.subplot(1,2,2)
+            plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))),linewidth=8)
+            plt.xlabel('Density of textline prediction in direction of X axis',fontsize=60)
+            plt.ylabel('Height',fontsize=60)
+            plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))])
+            plt.gca().invert_yaxis()
+            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_density_of_textline.png'))
+
+    def save_plot_of_rotation_angle(self, angels, var_res):
+        if self.dir_of_all is not None:
+            plt.figure(figsize=(60,30))
+            plt.rcParams['font.size']='50'
+            plt.plot(angels,np.array(var_res),'-o',markersize=25,linewidth=4)
+            plt.xlabel('angle',fontsize=50)
+            plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50)
+            plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))]  ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$')
+            plt.legend(loc='best')
+            plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_rotation_angle.png'))
+
+    def write_images_into_directory(self, img_contoures, image_page):
+        if self.dir_of_cropped_images is not None:
+            index = 0
+            for cont_ind in img_contoures:
+                x, y, w, h = cv2.boundingRect(cont_ind)
+                box = [x, y, w, h]
+                croped_page, page_coord = crop_image_inside_box(box, image_page)
+
+                croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y), int(croped_page.shape[1] / self.scale_x))
+
+                path = os.path.join(self.dir_of_cropped_images, self.image_filename_stem + "_" + str(index) + ".jpg")
+                cv2.imwrite(path, croped_page)
+                index += 1
+
--- a/qurator/eynollah/utils/init.py
+++ b/qurator/eynollah/utils/init.py
--- a/qurator/eynollah/utils/contour.py
+++ b/qurator/eynollah/utils/contour.py
@ -0,0 +1,238 @@
+import cv2
+import numpy as np
+from shapely import geometry
+
+from .rotate import rotate_image, rotation_image_new
+
+def contours_in_same_horizon(cy_main_hor):
+    X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
+    X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
+
+    X1[0::1, :] = cy_main_hor[:]
+    X2 = X1.T
+
+    X_dif = np.abs(X2 - X1)
+    args_help = np.array(range(len(cy_main_hor)))
+    all_args = []
+    for i in range(len(cy_main_hor)):
+        list_h = list(args_help[X_dif[i, :] <= 20])
+        list_h.append(i)
+        if len(list_h) > 1:
+            all_args.append(list(set(list_h)))
+    return np.unique(all_args)
+
+def find_contours_mean_y_diff(contours_main):
+    M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
+    cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
+    return np.mean(np.diff(np.sort(np.array(cy_main))))
+
+
+def get_text_region_boxes_by_given_contours(contours):
+
+    kernel = np.ones((5, 5), np.uint8)
+    boxes = []
+    contours_new = []
+    for jj in range(len(contours)):
+        x, y, w, h = cv2.boundingRect(contours[jj])
+
+        boxes.append([x, y, w, h])
+        contours_new.append(contours[jj])
+
+    del contours
+    return boxes, contours_new
+
+def filter_contours_area_of_image(image, contours, hirarchy, max_area, min_area):
+    found_polygons_early = list()
+    jv = 0
+    for c in contours:
+        if len(c) < 3:  # A polygon cannot have less than 3 points
+            continue
+
+        polygon = geometry.Polygon([point[0] for point in c])
+        area = polygon.area
+        if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hirarchy[0][jv][3] == -1:  # and hirarchy[0][jv][3]==-1 :
+            found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint))
+        jv += 1
+    return found_polygons_early
+
+def filter_contours_area_of_image_tables(image, contours, hirarchy, max_area, min_area):
+    found_polygons_early = list()
+
+    jv = 0
+    for c in contours:
+        if len(c) < 3:  # A polygon cannot have less than 3 points
+            continue
+
+        polygon = geometry.Polygon([point[0] for point in c])
+        # area = cv2.contourArea(c)
+        area = polygon.area
+        ##print(np.prod(thresh.shape[:2]))
+        # Check that polygon has area greater than minimal area
+        # print(hirarchy[0][jv][3],hirarchy )
+        if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]):  # and hirarchy[0][jv][3]==-1 :
+            # print(c[0][0][1])
+            found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32))
+        jv += 1
+    return found_polygons_early
+
+def find_new_features_of_contoures(contours_main):
+
+    areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
+    M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
+    cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
+    cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
+    try:
+        x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
+
+        argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
+
+        x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0] for j in range(len(contours_main))])
+        y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 1] for j in range(len(contours_main))])
+
+        x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
+
+        y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
+        y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
+    except:
+        x_min_main = np.array([np.min(contours_main[j][:, 0]) for j in range(len(contours_main))])
+
+        argmin_x_main = np.array([np.argmin(contours_main[j][:, 0]) for j in range(len(contours_main))])
+
+        x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0] for j in range(len(contours_main))])
+        y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1] for j in range(len(contours_main))])
+
+        x_max_main = np.array([np.max(contours_main[j][:, 0]) for j in range(len(contours_main))])
+
+        y_min_main = np.array([np.min(contours_main[j][:, 1]) for j in range(len(contours_main))])
+        y_max_main = np.array([np.max(contours_main[j][:, 1]) for j in range(len(contours_main))])
+
+    # dis_x=np.abs(x_max_main-x_min_main)
+
+    return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin
+
+def return_parent_contours(contours, hierarchy):
+    contours_parent = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3] == -1]
+    return contours_parent
+
+def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
+
+    # pixels of images are identified by 5
+    if len(region_pre_p.shape) == 3:
+        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
+    else:
+        cnts_images = (region_pre_p[:, :] == pixel) * 1
+    cnts_images = cnts_images.astype(np.uint8)
+    cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
+    imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
+    ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+
+    contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+    contours_imgs = return_parent_contours(contours_imgs, hiearchy)
+    contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=min_area)
+
+    return contours_imgs
+
+def get_textregion_contours_in_org_image(cnts, img, slope_first):
+
+    cnts_org = []
+    # print(cnts,'cnts')
+    for i in range(len(cnts)):
+        img_copy = np.zeros(img.shape)
+        img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1))
+
+        # plt.imshow(img_copy)
+        # plt.show()
+
+        # print(img.shape,'img')
+        img_copy = rotation_image_new(img_copy, -slope_first)
+        ##print(img_copy.shape,'img_copy')
+        # plt.imshow(img_copy)
+        # plt.show()
+
+        img_copy = img_copy.astype(np.uint8)
+        imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
+        ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+
+        cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+        cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
+        cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
+        # print(np.shape(cont_int[0]))
+        cnts_org.append(cont_int[0])
+
+    # print(cnts_org,'cnts_org')
+
+    # sys.exit()
+    # self.y_shift = np.abs(img_copy.shape[0] - img.shape[0])
+    # self.x_shift = np.abs(img_copy.shape[1] - img.shape[1])
+    return cnts_org
+
+def return_contours_of_interested_textline(region_pre_p, pixel):
+
+    # pixels of images are identified by 5
+    if len(region_pre_p.shape) == 3:
+        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
+    else:
+        cnts_images = (region_pre_p[:, :] == pixel) * 1
+    cnts_images = cnts_images.astype(np.uint8)
+    cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
+    imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
+    ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+    contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+    contours_imgs = return_parent_contours(contours_imgs, hiearchy)
+    contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=0.000000003)
+    return contours_imgs
+
+def return_contours_of_image(image):
+
+    if len(image.shape) == 2:
+        image = np.repeat(image[:, :, np.newaxis], 3, axis=2)
+        image = image.astype(np.uint8)
+    else:
+        image = image.astype(np.uint8)
+    imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+    contours, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+    return contours, hierachy
+
+def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003):
+
+    # pixels of images are identified by 5
+    if len(region_pre_p.shape) == 3:
+        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
+    else:
+        cnts_images = (region_pre_p[:, :] == pixel) * 1
+    cnts_images = cnts_images.astype(np.uint8)
+    cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
+    imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
+    ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+
+    contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+    contours_imgs = return_parent_contours(contours_imgs, hiearchy)
+    contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=min_size)
+
+    return contours_imgs
+
+def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area):
+
+    # pixels of images are identified by 5
+    if len(region_pre_p.shape) == 3:
+        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
+    else:
+        cnts_images = (region_pre_p[:, :] == pixel) * 1
+    cnts_images = cnts_images.astype(np.uint8)
+    cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
+    imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
+    ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+    contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+    contours_imgs = return_parent_contours(contours_imgs, hiearchy)
+    contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=max_area, min_area=min_area)
+
+    img_ret = np.zeros((region_pre_p.shape[0], region_pre_p.shape[1], 3))
+    img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1))
+    return img_ret[:, :, 0]
+
--- a/qurator/eynollah/utils/drop_capitals.py
+++ b/qurator/eynollah/utils/drop_capitals.py
@ -0,0 +1,501 @@
+import numpy as np
+import cv2
+from .contour import (
+    find_new_features_of_contoures,
+    return_contours_of_image,
+    return_parent_contours,
+)
+
+def adhere_drop_capital_region_into_cprresponding_textline(
+    text_regions_p,
+    polygons_of_drop_capitals,
+    contours_only_text_parent,
+    contours_only_text_parent_h,
+    all_box_coord,
+    all_box_coord_h,
+    all_found_texline_polygons,
+    all_found_texline_polygons_h,
+    kernel=None,
+    curved_line=False,
+):
+    # print(np.shape(all_found_texline_polygons),np.shape(all_found_texline_polygons[3]),'all_found_texline_polygonsshape')
+    # print(all_found_texline_polygons[3])
+    cx_m, cy_m, _, _, _, _, _ = find_new_features_of_contoures(contours_only_text_parent)
+    cx_h, cy_h, _, _, _, _, _ = find_new_features_of_contoures(contours_only_text_parent_h)
+    cx_d, cy_d, _, _, y_min_d, y_max_d, _ = find_new_features_of_contoures(polygons_of_drop_capitals)
+
+    img_con_all = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
+    for j_cont in range(len(contours_only_text_parent)):
+        img_con_all[all_box_coord[j_cont][0] : all_box_coord[j_cont][1], all_box_coord[j_cont][2] : all_box_coord[j_cont][3], 0] = (j_cont + 1) * 3
+        # img_con_all=cv2.fillPoly(img_con_all,pts=[contours_only_text_parent[j_cont]],color=((j_cont+1)*3,(j_cont+1)*3,(j_cont+1)*3))
+
+    # plt.imshow(img_con_all[:,:,0])
+    # plt.show()
+    # img_con_all=cv2.dilate(img_con_all, kernel, iterations=3)
+
+    # plt.imshow(img_con_all[:,:,0])
+    # plt.show()
+    # print(np.unique(img_con_all[:,:,0]))
+    for i_drop in range(len(polygons_of_drop_capitals)):
+        # print(i_drop,'i_drop')
+        img_con_all_copy = np.copy(img_con_all)
+        img_con = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
+        img_con = cv2.fillPoly(img_con, pts=[polygons_of_drop_capitals[i_drop]], color=(1, 1, 1))
+
+        # plt.imshow(img_con[:,:,0])
+        # plt.show()
+        ##img_con=cv2.dilate(img_con, kernel, iterations=30)
+
+        # plt.imshow(img_con[:,:,0])
+        # plt.show()
+
+        # print(np.unique(img_con[:,:,0]))
+
+        img_con_all_copy[:, :, 0] = img_con_all_copy[:, :, 0] + img_con[:, :, 0]
+
+        img_con_all_copy[:, :, 0][img_con_all_copy[:, :, 0] == 1] = 0
+
+        kherej_ghesmat = np.unique(img_con_all_copy[:, :, 0]) / 3
+        res_summed_pixels = np.unique(img_con_all_copy[:, :, 0]) % 3
+        region_with_intersected_drop = kherej_ghesmat[res_summed_pixels == 1]
+        # region_with_intersected_drop=region_with_intersected_drop/3
+        region_with_intersected_drop = region_with_intersected_drop.astype(np.uint8)
+
+        # print(len(region_with_intersected_drop),'region_with_intersected_drop1')
+        if len(region_with_intersected_drop) == 0:
+            img_con_all_copy = np.copy(img_con_all)
+            img_con = cv2.dilate(img_con, kernel, iterations=4)
+
+            img_con_all_copy[:, :, 0] = img_con_all_copy[:, :, 0] + img_con[:, :, 0]
+
+            img_con_all_copy[:, :, 0][img_con_all_copy[:, :, 0] == 1] = 0
+
+            kherej_ghesmat = np.unique(img_con_all_copy[:, :, 0]) / 3
+            res_summed_pixels = np.unique(img_con_all_copy[:, :, 0]) % 3
+            region_with_intersected_drop = kherej_ghesmat[res_summed_pixels == 1]
+            # region_with_intersected_drop=region_with_intersected_drop/3
+            region_with_intersected_drop = region_with_intersected_drop.astype(np.uint8)
+        # print(np.unique(img_con_all_copy[:,:,0]))
+        if curved_line:
+
+            if len(region_with_intersected_drop) > 1:
+                sum_pixels_of_intersection = []
+                for i in range(len(region_with_intersected_drop)):
+                    # print((region_with_intersected_drop[i]*3+1))
+                    sum_pixels_of_intersection.append(((img_con_all_copy[:, :, 0] == (region_with_intersected_drop[i] * 3 + 1)) * 1).sum())
+                # print(sum_pixels_of_intersection)
+                region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1
+
+                # print(region_final,'region_final')
+                # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
+                try:
+                    cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
+                    # print(all_box_coord[j_cont])
+                    # print(cx_t)
+                    # print(cy_t)
+                    # print(cx_d[i_drop])
+                    # print(cy_d[i_drop])
+                    y_lines = np.array(cy_t)  # all_box_coord[int(region_final)][0]+np.array(cy_t)
+
+                    # print(y_lines)
+
+                    y_lines[y_lines < y_min_d[i_drop]] = 0
+                    # print(y_lines)
+
+                    arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
+                    # print(arg_min)
+
+                    cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
+                    cnt_nearest[:, 0, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 0]  # +all_box_coord[int(region_final)][2]
+                    cnt_nearest[:, 0, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 1]  # +all_box_coord[int(region_final)][0]
+
+                    img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
+                    img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
+                    img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
+
+                    img_textlines = img_textlines.astype(np.uint8)
+                    imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
+                    ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+
+                    contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+                    # print(len(contours_combined),'len textlines mixed')
+                    areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
+
+                    contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
+
+                    # print(np.shape(contours_biggest))
+                    # print(contours_biggest[:])
+                    # contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
+                    # contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
+
+                    # contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
+
+                    all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
+
+                except:
+                    # print('gordun1')
+                    pass
+            elif len(region_with_intersected_drop) == 1:
+                region_final = region_with_intersected_drop[0] - 1
+
+                # areas_main=np.array([cv2.contourArea(all_found_texline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_texline_polygons[int(region_final)]))])
+
+                # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
+
+                cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
+                # print(all_box_coord[j_cont])
+                # print(cx_t)
+                # print(cy_t)
+                # print(cx_d[i_drop])
+                # print(cy_d[i_drop])
+                y_lines = np.array(cy_t)  # all_box_coord[int(region_final)][0]+np.array(cy_t)
+
+                y_lines[y_lines < y_min_d[i_drop]] = 0
+                # print(y_lines)
+
+                arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
+                # print(arg_min)
+
+                cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
+                cnt_nearest[:, 0, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 0]  # +all_box_coord[int(region_final)][2]
+                cnt_nearest[:, 0, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 1]  # +all_box_coord[int(region_final)][0]
+
+                img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
+                img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
+                img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
+
+                img_textlines = img_textlines.astype(np.uint8)
+
+                # plt.imshow(img_textlines)
+                # plt.show()
+                imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
+                ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+
+                contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+                # print(len(contours_combined),'len textlines mixed')
+                areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
+
+                contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
+
+                # print(np.shape(contours_biggest))
+                # print(contours_biggest[:])
+                # contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
+                # contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
+                # print(np.shape(contours_biggest),'contours_biggest')
+                # print(np.shape(all_found_texline_polygons[int(region_final)][arg_min]))
+                ##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
+                all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
+
+                # print(cx_t,'print')
+                try:
+                    # print(all_found_texline_polygons[j_cont][0])
+                    cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
+                    # print(all_box_coord[j_cont])
+                    # print(cx_t)
+                    # print(cy_t)
+                    # print(cx_d[i_drop])
+                    # print(cy_d[i_drop])
+                    y_lines = all_box_coord[int(region_final)][0] + np.array(cy_t)
+
+                    y_lines[y_lines < y_min_d[i_drop]] = 0
+                    # print(y_lines)
+
+                    arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
+                    # print(arg_min)
+
+                    cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
+                    cnt_nearest[:, 0, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 0]  # +all_box_coord[int(region_final)][2]
+                    cnt_nearest[:, 0, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 1]  # +all_box_coord[int(region_final)][0]
+
+                    img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
+                    img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
+                    img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
+
+                    img_textlines = img_textlines.astype(np.uint8)
+                    imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
+                    ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+
+                    contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+                    # print(len(contours_combined),'len textlines mixed')
+                    areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
+
+                    contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
+
+                    # print(np.shape(contours_biggest))
+                    # print(contours_biggest[:])
+                    contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0]  # -all_box_coord[int(region_final)][2]
+                    contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1]  # -all_box_coord[int(region_final)][0]
+
+                    ##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
+                    all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
+                    # all_found_texline_polygons[int(region_final)][arg_min]=contours_biggest
+
+                except:
+                    pass
+            else:
+                pass
+
+            ##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
+            ###print(all_box_coord[j_cont])
+            ###print(cx_t)
+            ###print(cy_t)
+            ###print(cx_d[i_drop])
+            ###print(cy_d[i_drop])
+            ##y_lines=all_box_coord[int(region_final)][0]+np.array(cy_t)
+
+            ##y_lines[y_lines<y_min_d[i_drop]]=0
+            ###print(y_lines)
+
+            ##arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop])  )
+            ###print(arg_min)
+
+            ##cnt_nearest=np.copy(all_found_texline_polygons[int(region_final)][arg_min])
+            ##cnt_nearest[:,0,0]=all_found_texline_polygons[int(region_final)][arg_min][:,0,0]#+all_box_coord[int(region_final)][2]
+            ##cnt_nearest[:,0,1]=all_found_texline_polygons[int(region_final)][arg_min][:,0,1]#+all_box_coord[int(region_final)][0]
+
+            ##img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
+            ##img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
+            ##img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
+
+            ##img_textlines=img_textlines.astype(np.uint8)
+
+            ##plt.imshow(img_textlines)
+            ##plt.show()
+            ##imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
+            ##ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+
+            ##contours_combined,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
+
+            ##print(len(contours_combined),'len textlines mixed')
+            ##areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
+
+            ##contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
+
+            ###print(np.shape(contours_biggest))
+            ###print(contours_biggest[:])
+            ##contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
+            ##contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
+
+            ##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
+            ##all_found_texline_polygons[int(region_final)][arg_min]=contours_biggest
+
+        else:
+            if len(region_with_intersected_drop) > 1:
+                sum_pixels_of_intersection = []
+                for i in range(len(region_with_intersected_drop)):
+                    # print((region_with_intersected_drop[i]*3+1))
+                    sum_pixels_of_intersection.append(((img_con_all_copy[:, :, 0] == (region_with_intersected_drop[i] * 3 + 1)) * 1).sum())
+                # print(sum_pixels_of_intersection)
+                region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1
+
+                # print(region_final,'region_final')
+                # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
+                try:
+                    cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
+                    # print(all_box_coord[j_cont])
+                    # print(cx_t)
+                    # print(cy_t)
+                    # print(cx_d[i_drop])
+                    # print(cy_d[i_drop])
+                    y_lines = all_box_coord[int(region_final)][0] + np.array(cy_t)
+
+                    # print(y_lines)
+
+                    y_lines[y_lines < y_min_d[i_drop]] = 0
+                    # print(y_lines)
+
+                    arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
+                    # print(arg_min)
+
+                    cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
+                    cnt_nearest[:, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0] + all_box_coord[int(region_final)][2]
+                    cnt_nearest[:, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 1] + all_box_coord[int(region_final)][0]
+
+                    img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
+                    img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
+                    img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
+
+                    img_textlines = img_textlines.astype(np.uint8)
+                    imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
+                    ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+
+                    contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+                    # print(len(contours_combined),'len textlines mixed')
+                    areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
+
+                    contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
+
+                    # print(np.shape(contours_biggest))
+                    # print(contours_biggest[:])
+                    contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] - all_box_coord[int(region_final)][2]
+                    contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0]
+
+                    contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2])
+
+                    all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
+
+                except:
+                    # print('gordun1')
+                    pass
+            elif len(region_with_intersected_drop) == 1:
+                region_final = region_with_intersected_drop[0] - 1
+
+                # areas_main=np.array([cv2.contourArea(all_found_texline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_texline_polygons[int(region_final)]))])
+
+                # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
+
+                # print(cx_t,'print')
+                try:
+                    # print(all_found_texline_polygons[j_cont][0])
+                    cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
+                    # print(all_box_coord[j_cont])
+                    # print(cx_t)
+                    # print(cy_t)
+                    # print(cx_d[i_drop])
+                    # print(cy_d[i_drop])
+                    y_lines = all_box_coord[int(region_final)][0] + np.array(cy_t)
+
+                    y_lines[y_lines < y_min_d[i_drop]] = 0
+                    # print(y_lines)
+
+                    arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
+                    # print(arg_min)
+
+                    cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
+                    cnt_nearest[:, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0] + all_box_coord[int(region_final)][2]
+                    cnt_nearest[:, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 1] + all_box_coord[int(region_final)][0]
+
+                    img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
+                    img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
+                    img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
+
+                    img_textlines = img_textlines.astype(np.uint8)
+                    imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
+                    ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+
+                    contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+
+                    # print(len(contours_combined),'len textlines mixed')
+                    areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
+
+                    contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
+
+                    # print(np.shape(contours_biggest))
+                    # print(contours_biggest[:])
+                    contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] - all_box_coord[int(region_final)][2]
+                    contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0]
+
+                    contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2])
+                    all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
+                    # all_found_texline_polygons[int(region_final)][arg_min]=contours_biggest
+
+                except:
+                    pass
+            else:
+                pass
+
+    #####for i_drop in range(len(polygons_of_drop_capitals)):
+    #####for j_cont in range(len(contours_only_text_parent)):
+    #####img_con=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
+    #####img_con=cv2.fillPoly(img_con,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
+    #####img_con=cv2.fillPoly(img_con,pts=[contours_only_text_parent[j_cont]],color=(255,255,255))
+
+    #####img_con=img_con.astype(np.uint8)
+    ######imgray = cv2.cvtColor(img_con, cv2.COLOR_BGR2GRAY)
+    ######ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+
+    ######contours_new,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
+
+    #####contours_new,hir_new=return_contours_of_image(img_con)
+    #####contours_new_parent=return_parent_contours( contours_new,hir_new)
+    ######plt.imshow(img_con)
+    ######plt.show()
+    #####try:
+    #####if len(contours_new_parent)==1:
+    ######print(all_found_texline_polygons[j_cont][0])
+    #####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[j_cont])
+    ######print(all_box_coord[j_cont])
+    ######print(cx_t)
+    ######print(cy_t)
+    ######print(cx_d[i_drop])
+    ######print(cy_d[i_drop])
+    #####y_lines=all_box_coord[j_cont][0]+np.array(cy_t)
+
+    ######print(y_lines)
+
+    #####arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop])  )
+    ######print(arg_min)
+
+    #####cnt_nearest=np.copy(all_found_texline_polygons[j_cont][arg_min])
+    #####cnt_nearest[:,0]=all_found_texline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2]
+    #####cnt_nearest[:,1]=all_found_texline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0]
+
+    #####img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
+    #####img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
+    #####img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
+
+    #####img_textlines=img_textlines.astype(np.uint8)
+    #####imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
+    #####ret, thresh = cv2.threshold(imgray, 0, 255, 0)
+
+    #####contours_combined,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
+
+    #####areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
+
+    #####contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
+
+    ######print(np.shape(contours_biggest))
+    ######print(contours_biggest[:])
+    #####contours_biggest[:,0,0]=contours_biggest[:,0,0]-all_box_coord[j_cont][2]
+    #####contours_biggest[:,0,1]=contours_biggest[:,0,1]-all_box_coord[j_cont][0]
+
+    #####all_found_texline_polygons[j_cont][arg_min]=contours_biggest
+    ######print(contours_biggest)
+    ######plt.imshow(img_textlines[:,:,0])
+    ######plt.show()
+    #####else:
+    #####pass
+    #####except:
+    #####pass
+    return all_found_texline_polygons
+
+def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1):
+
+    drop_only = (layout_no_patch[:, :, 0] == 4) * 1
+    contours_drop, hir_on_drop = return_contours_of_image(drop_only)
+    contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop)
+
+    areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) for j in range(len(contours_drop_parent))])
+    areas_cnt_text = areas_cnt_text / float(drop_only.shape[0] * drop_only.shape[1])
+
+    contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.001]
+
+    areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.001]
+
+    contours_drop_parent_final = []
+
+    for jj in range(len(contours_drop_parent)):
+        x, y, w, h = cv2.boundingRect(contours_drop_parent[jj])
+        # boxes.append([int(x), int(y), int(w), int(h)])
+
+        iou_of_box_and_contoure = float(drop_only.shape[0] * drop_only.shape[1]) * areas_cnt_text[jj] / float(w * h) * 100
+        height_to_weight_ratio = h / float(w)
+        weigh_to_height_ratio = w / float(h)
+
+        if iou_of_box_and_contoure > 60 and weigh_to_height_ratio < 1.2 and height_to_weight_ratio < 2:
+            map_of_drop_contour_bb = np.zeros((layout1.shape[0], layout1.shape[1]))
+            map_of_drop_contour_bb[y : y + h, x : x + w] = layout1[y : y + h, x : x + w]
+
+            if (((map_of_drop_contour_bb == 1) * 1).sum() / float(((map_of_drop_contour_bb == 5) * 1).sum()) * 100) >= 15:
+                contours_drop_parent_final.append(contours_drop_parent[jj])
+
+    layout_no_patch[:, :, 0][layout_no_patch[:, :, 0] == 4] = 0
+
+    layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=(4, 4, 4))
+
+    return layout_no_patch
+
--- a/qurator/eynollah/utils/is_nan.py
+++ b/qurator/eynollah/utils/is_nan.py
@ -0,0 +1,3 @@
+
+def isNaN(num):
+    return num != num
--- a/qurator/eynollah/utils/marginals.py
+++ b/qurator/eynollah/utils/marginals.py
@ -0,0 +1,252 @@
+import numpy as np
+import cv2
+from scipy.signal import find_peaks
+from scipy.ndimage import gaussian_filter1d
+
+
+from .contour import find_new_features_of_contoures, return_contours_of_interested_region
+from .resize import resize_image
+from .rotate import rotate_image
+
+def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None):
+    mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1]))
+    mask_marginals=mask_marginals.astype(np.uint8)
+
+
+    text_with_lines=text_with_lines.astype(np.uint8)
+    ##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3)
+
+    text_with_lines_eroded=cv2.erode(text_with_lines,kernel,iterations=5)
+
+    if text_with_lines.shape[0]<=1500:
+        pass
+    elif text_with_lines.shape[0]>1500 and text_with_lines.shape[0]<=1800:
+        text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.5),text_with_lines.shape[1])
+        text_with_lines=cv2.erode(text_with_lines,kernel,iterations=5)
+        text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1])
+    else:
+        text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.8),text_with_lines.shape[1])
+        text_with_lines=cv2.erode(text_with_lines,kernel,iterations=7)
+        text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1])
+
+
+    text_with_lines_y=text_with_lines.sum(axis=0)
+    text_with_lines_y_eroded=text_with_lines_eroded.sum(axis=0)
+
+    thickness_along_y_percent=text_with_lines_y_eroded.max()/(float(text_with_lines.shape[0]))*100
+
+    #print(thickness_along_y_percent,'thickness_along_y_percent')
+
+    if thickness_along_y_percent<30:
+        min_textline_thickness=8
+    elif thickness_along_y_percent>=30 and thickness_along_y_percent<50:
+        min_textline_thickness=20
+    else:
+        min_textline_thickness=40
+
+
+
+    if thickness_along_y_percent>=14:
+
+        text_with_lines_y_rev=-1*text_with_lines_y[:]
+        #print(text_with_lines_y)
+        #print(text_with_lines_y_rev)
+
+
+
+
+        #plt.plot(text_with_lines_y)
+        #plt.show()
+
+
+        text_with_lines_y_rev=text_with_lines_y_rev-np.min(text_with_lines_y_rev)
+
+        #plt.plot(text_with_lines_y_rev)
+        #plt.show()
+        sigma_gaus=1
+        region_sum_0= gaussian_filter1d(text_with_lines_y, sigma_gaus)
+
+        region_sum_0_rev=gaussian_filter1d(text_with_lines_y_rev, sigma_gaus)
+
+        #plt.plot(region_sum_0_rev)
+        #plt.show()
+        region_sum_0_updown=region_sum_0[len(region_sum_0)::-1]
+
+        first_nonzero=(next((i for i, x in enumerate(region_sum_0) if x), None))
+        last_nonzero=(next((i for i, x in enumerate(region_sum_0_updown) if x), None))
+
+
+        last_nonzero=len(region_sum_0)-last_nonzero
+
+        ##img_sum_0_smooth_rev=-region_sum_0
+
+
+        mid_point=(last_nonzero+first_nonzero)/2.
+
+
+        one_third_right=(last_nonzero-mid_point)/3.0
+        one_third_left=(mid_point-first_nonzero)/3.0
+
+        #img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev)
+
+
+
+
+        peaks, _ = find_peaks(text_with_lines_y_rev, height=0)
+
+
+        peaks=np.array(peaks)
+
+
+        #print(region_sum_0[peaks])
+        ##plt.plot(region_sum_0)
+        ##plt.plot(peaks,region_sum_0[peaks],'*')
+        ##plt.show()
+        #print(first_nonzero,last_nonzero,peaks)
+        peaks=peaks[(peaks>first_nonzero) & ((peaks<last_nonzero))]
+
+        #print(first_nonzero,last_nonzero,peaks)
+
+
+        #print(region_sum_0[peaks]<10)
+        ####peaks=peaks[region_sum_0[peaks]<25 ]
+
+        #print(region_sum_0[peaks])
+        peaks=peaks[region_sum_0[peaks]<min_textline_thickness ]
+        #print(peaks)
+        #print(first_nonzero,last_nonzero,one_third_right,one_third_left)
+
+        if num_col==1:
+            peaks_right=peaks[peaks>mid_point]
+            peaks_left=peaks[peaks<mid_point]
+        if num_col==2:
+            peaks_right=peaks[peaks>(mid_point+one_third_right)]
+            peaks_left=peaks[peaks<(mid_point-one_third_left)]
+
+
+        try:
+            point_right=np.min(peaks_right)
+        except:
+            point_right=last_nonzero
+
+
+        try:
+            point_left=np.max(peaks_left)
+        except:
+            point_left=first_nonzero
+
+
+
+
+        #print(point_left,point_right)
+        #print(text_regions.shape)
+        if point_right>=mask_marginals.shape[1]:
+            point_right=mask_marginals.shape[1]-1
+
+        try:
+            mask_marginals[:,point_left:point_right]=1
+        except:
+            mask_marginals[:,:]=1
+
+        #print(mask_marginals.shape,point_left,point_right,'nadosh')
+        mask_marginals_rotated=rotate_image(mask_marginals,-slope_deskew)
+
+        #print(mask_marginals_rotated.shape,'nadosh')
+        mask_marginals_rotated_sum=mask_marginals_rotated.sum(axis=0)
+
+        mask_marginals_rotated_sum[mask_marginals_rotated_sum!=0]=1
+        index_x=np.array(range(len(mask_marginals_rotated_sum)))+1
+
+        index_x_interest=index_x[mask_marginals_rotated_sum==1]
+
+        min_point_of_left_marginal=np.min(index_x_interest)-16
+        max_point_of_right_marginal=np.max(index_x_interest)+16
+
+        if min_point_of_left_marginal<0:
+            min_point_of_left_marginal=0
+        if max_point_of_right_marginal>=text_regions.shape[1]:
+            max_point_of_right_marginal=text_regions.shape[1]-1
+
+
+        #print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew')
+        #print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated')
+        #plt.imshow(mask_marginals)
+        #plt.show()
+
+        #plt.imshow(mask_marginals_rotated)
+        #plt.show()
+
+        text_regions[(mask_marginals_rotated[:,:]!=1) & (text_regions[:,:]==1)]=4
+
+        #plt.imshow(text_regions)
+        #plt.show()
+
+        pixel_img=4
+        min_area_text=0.00001
+        polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text)
+
+        cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contoures(polygons_of_marginals)
+
+        text_regions[(text_regions[:,:]==4)]=1
+
+        marginlas_should_be_main_text=[]
+
+        x_min_marginals_left=[]
+        x_min_marginals_right=[]
+
+        for i in range(len(cx_text_only)):
+
+            x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i])
+            y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i])
+            #print(x_width_mar,y_height_mar,y_height_mar/x_width_mar,'y_height_mar')
+            if x_width_mar>16 and y_height_mar/x_width_mar<18:
+                marginlas_should_be_main_text.append(polygons_of_marginals[i])
+                if x_min_text_only[i]<(mid_point-one_third_left):
+                    x_min_marginals_left_new=x_min_text_only[i]
+                    if len(x_min_marginals_left)==0:
+                        x_min_marginals_left.append(x_min_marginals_left_new)
+                    else:
+                        x_min_marginals_left[0]=min(x_min_marginals_left[0],x_min_marginals_left_new)
+                else:
+                    x_min_marginals_right_new=x_min_text_only[i]
+                    if len(x_min_marginals_right)==0:
+                        x_min_marginals_right.append(x_min_marginals_right_new)
+                    else:
+                        x_min_marginals_right[0]=min(x_min_marginals_right[0],x_min_marginals_right_new)
+
+        if len(x_min_marginals_left)==0:
+            x_min_marginals_left=[0]
+        if len(x_min_marginals_right)==0:
+            x_min_marginals_right=[text_regions.shape[1]-1]
+
+
+
+
+        #print(x_min_marginals_left[0],x_min_marginals_right[0],'margo')
+
+        #print(marginlas_should_be_main_text,'marginlas_should_be_main_text')
+        text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4))
+
+        #print(np.unique(text_regions))
+
+        #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0
+        #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0
+
+        text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0
+        text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0
+
+        ###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4
+
+        ###text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4
+        #plt.plot(region_sum_0)
+        #plt.plot(peaks,region_sum_0[peaks],'*')
+        #plt.show()
+
+
+        #plt.imshow(text_regions)
+        #plt.show()
+
+        #sys.exit()
+    else:
+        pass
+    return text_regions
--- a/qurator/eynollah/utils/pil_cv2.py
+++ b/qurator/eynollah/utils/pil_cv2.py
@ -0,0 +1,24 @@
+from PIL import Image
+import numpy as np
+from ocrd_models import OcrdExif
+from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, cvtColor, imread
+
+# from sbb_binarization
+
+def cv2pil(img):
+    return Image.fromarray(img.astype('uint8'))
+
+def pil2cv(img):
+    # from ocrd/workspace.py
+    color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else  COLOR_RGB2BGR
+    pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
+    return cvtColor(pil_as_np_array, color_conversion)
+
+def check_dpi(image_filename):
+    exif = OcrdExif(Image.open(image_filename))
+    print(exif.to_xml())
+    resolution = exif.resolution
+    if exif.resolutionUnit == 'cm':
+        resolution /= 2.54
+    return int(resolution)
+
--- a/qurator/eynollah/utils/resize.py
+++ b/qurator/eynollah/utils/resize.py
@ -0,0 +1,4 @@
+import cv2
+
+def resize_image(img_in, input_height, input_width):
+    return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
--- a/qurator/eynollah/utils/rotate.py
+++ b/qurator/eynollah/utils/rotate.py
@ -0,0 +1,85 @@
+import math
+
+import imutils
+import cv2
+
+def rotatedRectWithMaxArea(w, h, angle):
+    if w <= 0 or h <= 0:
+        return 0, 0
+
+    width_is_longer = w >= h
+    side_long, side_short = (w, h) if width_is_longer else (h, w)
+
+    # since the solutions for angle, -angle and 180-angle are all the same,
+    # if suffices to look at the first quadrant and the absolute values of sin,cos:
+    sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle))
+    if side_short <= 2.0 * sin_a * cos_a * side_long or abs(sin_a - cos_a) < 1e-10:
+        # half constrained case: two crop corners touch the longer side,
+        #   the other two corners are on the mid-line parallel to the longer line
+        x = 0.5 * side_short
+        wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a)
+    else:
+        # fully constrained case: crop touches all 4 sides
+        cos_2a = cos_a * cos_a - sin_a * sin_a
+        wr, hr = (w * cos_a - h * sin_a) / cos_2a, (h * cos_a - w * sin_a) / cos_2a
+
+    return wr, hr
+
+def rotate_max_area_new(image, rotated, angle):
+    wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
+    h, w, _ = rotated.shape
+    y1 = h // 2 - int(hr / 2)
+    y2 = y1 + int(hr)
+    x1 = w // 2 - int(wr / 2)
+    x2 = x1 + int(wr)
+    return rotated[y1:y2, x1:x2]
+
+def rotation_image_new(img, thetha):
+    rotated = imutils.rotate(img, thetha)
+    return rotate_max_area_new(img, rotated, thetha)
+
+def rotate_image(img_patch, slope):
+    (h, w) = img_patch.shape[:2]
+    center = (w // 2, h // 2)
+    M = cv2.getRotationMatrix2D(center, slope, 1.0)
+    return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
+
+def rotyate_image_different( img, slope):
+    # img = cv2.imread('images/input.jpg')
+    num_rows, num_cols = img.shape[:2]
+
+    rotation_matrix = cv2.getRotationMatrix2D((num_cols / 2, num_rows / 2), slope, 1)
+    img_rotation = cv2.warpAffine(img, rotation_matrix, (num_cols, num_rows))
+    return img_rotation
+
+def rotate_max_area(image, rotated, rotated_textline, rotated_layout, angle):
+    wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
+    h, w, _ = rotated.shape
+    y1 = h // 2 - int(hr / 2)
+    y2 = y1 + int(hr)
+    x1 = w // 2 - int(wr / 2)
+    x2 = x1 + int(wr)
+    return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2]
+
+def rotation_not_90_func(img, textline, text_regions_p_1, thetha):
+    rotated = imutils.rotate(img, thetha)
+    rotated_textline = imutils.rotate(textline, thetha)
+    rotated_layout = imutils.rotate(text_regions_p_1, thetha)
+    return rotate_max_area(img, rotated, rotated_textline, rotated_layout, thetha)
+
+def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regions_p_fully, thetha):
+    rotated = imutils.rotate(img, thetha)
+    rotated_textline = imutils.rotate(textline, thetha)
+    rotated_layout = imutils.rotate(text_regions_p_1, thetha)
+    rotated_layout_full = imutils.rotate(text_regions_p_fully, thetha)
+    return rotate_max_area_full_layout(img, rotated, rotated_textline, rotated_layout, rotated_layout_full, thetha)
+
+def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout, rotated_layout_full, angle):
+    wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
+    h, w, _ = rotated.shape
+    y1 = h // 2 - int(hr / 2)
+    y2 = y1 + int(hr)
+    x1 = w // 2 - int(wr / 2)
+    x2 = x1 + int(wr)
+    return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_layout_full[y1:y2, x1:x2]
+
--- a/qurator/eynollah/utils/separate_lines.py
+++ b/qurator/eynollah/utils/separate_lines.py
--- a/qurator/eynollah/utils/xml.py
+++ b/qurator/eynollah/utils/xml.py
@ -0,0 +1,62 @@
+from lxml import etree as ET
+
+NAMESPACES = {}
+NAMESPACES['page'] = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"
+NAMESPACES['xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
+NAMESPACES[None] = NAMESPACES['page']
+
+def create_page_xml(imageFilename, height, width):
+    pcgts = ET.Element("PcGts", nsmap=NAMESPACES)
+
+    pcgts.set("{%s}schemaLocation" % NAMESPACES['xsi'], NAMESPACES['page'])
+
+    metadata = ET.SubElement(pcgts, "Metadata")
+
+    author = ET.SubElement(metadata, "Creator")
+    author.text = "SBB_QURATOR"
+
+    created = ET.SubElement(metadata, "Created")
+    created.text = "2019-06-17T18:15:12"
+
+    changetime = ET.SubElement(metadata, "LastChange")
+    changetime.text = "2019-06-17T18:15:12"
+
+    page = ET.SubElement(pcgts, "Page")
+
+    page.set("imageFilename", imageFilename)
+    page.set("imageHeight", str(height))
+    page.set("imageWidth", str(width))
+    page.set("type", "content")
+    page.set("readingDirection", "left-to-right")
+    page.set("textLineOrder", "top-to-bottom")
+
+    return pcgts, page
+
+def add_textequiv(parent, text=''):
+    textequiv = ET.SubElement(parent, 'TextEquiv')
+    unireg = ET.SubElement(textequiv, 'Unicode')
+    unireg.text = text
+
+def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals):
+    """
+    XXX side-effect: extends id_of_marginalia
+    """
+    region_order = ET.SubElement(page, 'ReadingOrder')
+    region_order_sub = ET.SubElement(region_order, 'OrderedGroup')
+    region_order_sub.set('id', "ro357564684568544579089")
+    indexer_region = 0
+    for vj in order_of_texts:
+        name = "coord_text_%s" % vj
+        name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
+        name.set('index', str(indexer_region))
+        name.set('regionRef', id_of_texts[vj])
+        indexer_region += 1
+    for vm in range(len(found_polygons_marginals)):
+        id_of_marginalia.append('r%s' % indexer_region)
+        name = "coord_text_%s" % indexer_region
+        name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
+        name.set('index', str(indexer_region))
+        name.set('regionRef', 'r%s' % indexer_region)
+        indexer_region += 1
+    return id_of_marginalia
+
--- a/qurator/eynollah/writer.py
+++ b/qurator/eynollah/writer.py
@ -0,0 +1,272 @@
+# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
+from pathlib import Path
+import os.path
+
+from .utils.xml import create_page_xml, add_textequiv, xml_reading_order
+
+from ocrd_utils import getLogger
+from lxml import etree as ET
+import numpy as np
+
+class EynollahXmlWriter():
+
+    def __init__(self, *, dir_out, image_filename, curved_line):
+        self.logger = getLogger('eynollah.writer')
+        self.dir_out = dir_out
+        self.image_filename = image_filename
+        self.image_filename_stem = Path(Path(image_filename).name).stem
+        self.curved_line = curved_line
+        self.scale_x = None # XXX set outside __init__
+        self.scale_y = None # XXX set outside __init__
+        self.height_org = None # XXX set outside __init__
+        self.width_org = None # XXX set outside __init__
+
+    def calculate_page_coords(self, cont_page):
+        self.logger.debug('enter calculate_page_coords')
+        points_page_print = ""
+        for _, contour in enumerate(cont_page[0]):
+            if len(contour) == 2:
+                points_page_print += str(int((contour[0]) / self.scale_x))
+                points_page_print += ','
+                points_page_print += str(int((contour[1]) / self.scale_y))
+            else:
+                points_page_print += str(int((contour[0][0]) / self.scale_x))
+                points_page_print += ','
+                points_page_print += str(int((contour[0][1] ) / self.scale_y))
+            points_page_print = points_page_print + ' '
+        return points_page_print[:-1]
+
+    def serialize_lines_in_marginal(self, marginal, all_found_texline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, id_indexer_l):
+        for j in range(len(all_found_texline_polygons_marginals[marginal_idx])):
+            textline = ET.SubElement(marginal, 'TextLine')
+            textline.set('id', 'l%s' % id_indexer_l)
+            id_indexer_l += 1
+            coord = ET.SubElement(textline, 'Coords')
+            add_textequiv(textline)
+            points_co = ''
+            for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])):
+                if not self.curved_line:
+                    if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
+                    else:
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0])/self.scale_y))
+                if self.curved_line and np.abs(slopes_marginals[marginal_idx]) <= 45:
+                    if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y))
+                    else:
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y))
+                        
+                elif self.curved_line and np.abs(slopes_marginals[marginal_idx]) > 45:
+                    if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
+                    else:
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
+
+                if l < len(all_found_texline_polygons_marginals[marginal_idx][j]) - 1:
+                    points_co += ' '
+            coord.set('points',points_co)
+        return id_indexer_l
+
+    def serialize_lines_in_region(self, textregion, all_found_texline_polygons, region_idx, page_coord, all_box_coord, slopes, id_indexer_l):
+        self.logger.debug('enter serialize_lines_in_region')
+        for j in range(len(all_found_texline_polygons[region_idx])):
+            textline = ET.SubElement(textregion, 'TextLine')
+            textline.set('id', 'l%s' % id_indexer_l)
+            id_indexer_l += 1
+            coord = ET.SubElement(textline, 'Coords')
+            add_textequiv(textline)
+
+            points_co = ''
+            for l in range(len(all_found_texline_polygons[region_idx][j])):
+                if not self.curved_line:
+                    if len(all_found_texline_polygons[region_idx][j][l])==2:
+                        textline_x_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0] + all_box_coord[region_idx][2] + page_coord[2]) / self.scale_x))
+                        textline_y_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][1] + all_box_coord[region_idx][0] + page_coord[0]) / self.scale_y))
+                    else:
+                        textline_x_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0][0] + all_box_coord[region_idx][2] + page_coord[2]) / self.scale_x))
+                        textline_y_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0][1] + all_box_coord[region_idx][0] + page_coord[0]) / self.scale_y))
+                    points_co += str(textline_x_coord)
+                    points_co += ','
+                    points_co += str(textline_y_coord)
+
+                if self.curved_line and np.abs(slopes[region_idx]) <= 45:
+                    if len(all_found_texline_polygons[region_idx][j][l]) == 2:
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][1] + page_coord[0]) / self.scale_y))
+                    else:
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][0] + page_coord[2]) / self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][1] + page_coord[0])/self.scale_y))
+                elif self.curved_line and np.abs(slopes[region_idx]) > 45:
+                    if len(all_found_texline_polygons[region_idx][j][l])==2:
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0] + all_box_coord[region_idx][2]+page_coord[2])/self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][1] + all_box_coord[region_idx][0]+page_coord[0])/self.scale_y))
+                    else:
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][0] + all_box_coord[region_idx][2]+page_coord[2])/self.scale_x))
+                        points_co += ','
+                        points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][1] + all_box_coord[region_idx][0]+page_coord[0])/self.scale_y))
+
+                if l < len(all_found_texline_polygons[region_idx][j]) - 1:
+                    points_co += ' '
+            coord.set('points',points_co)
+        return id_indexer_l
+
+    def write_pagexml(self, pcgts):
+        self.logger.info("filename stem: '%s'", self.image_filename_stem)
+        tree = ET.ElementTree(pcgts)
+        tree.write(os.path.join(self.dir_out, self.image_filename_stem) + ".xml")
+
+    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page):
+        self.logger.debug('enter build_pagexml_no_full_layout')
+
+        # create the file structure
+        pcgts, page = create_page_xml(self.image_filename, self.height_org, self.width_org)
+        page_print_sub = ET.SubElement(page, "Border")
+        coord_page = ET.SubElement(page_print_sub, "Coords")
+        coord_page.set('points', self.calculate_page_coords(cont_page))
+
+        id_of_marginalia = []
+        id_indexer = 0
+        id_indexer_l = 0
+        if len(found_polygons_text_region) > 0:
+            id_of_marginalia = xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals)
+            for mm in range(len(found_polygons_text_region)):
+                textregion = ET.SubElement(page, 'TextRegion')
+                textregion.set('id', 'r%s' % id_indexer)
+                id_indexer += 1
+                textregion.set('type', 'paragraph')
+                coord_text = ET.SubElement(textregion, 'Coords')
+                coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
+                id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l)
+                add_textequiv(textregion)
+
+        for mm in range(len(found_polygons_marginals)):
+            marginal = ET.SubElement(page, 'TextRegion')
+            marginal.set('id', id_of_marginalia[mm])
+            marginal.set('type', 'marginalia')
+            coord_text = ET.SubElement(marginal, 'Coords')
+            coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord))
+            id_indexer_l = self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, id_indexer_l)
+
+        id_indexer = len(found_polygons_text_region) + len(found_polygons_marginals)
+        for mm in range(len(found_polygons_text_region_img)):
+            textregion = ET.SubElement(page, 'ImageRegion')
+            textregion.set('id', 'r%s' % id_indexer)
+            id_indexer += 1
+            coord_text = ET.SubElement(textregion, 'Coords')
+            points_co = ''
+            for lmm in range(len(found_polygons_text_region_img[mm])):
+                points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
+                points_co += ','
+                points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
+                if lmm < len(found_polygons_text_region_img[mm]) - 1:
+                    points_co += ' '
+            coord_text.set('points', points_co)
+
+        return pcgts
+
+    def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page):
+        self.logger.debug('enter build_pagexml_full_layout')
+
+        # create the file structure
+        pcgts, page = create_page_xml(self.image_filename, self.height_org, self.width_org)
+        page_print_sub = ET.SubElement(page, "Border")
+        coord_page = ET.SubElement(page_print_sub, "Coords")
+        coord_page.set('points', self.calculate_page_coords(cont_page))
+
+        id_indexer = 0
+        id_indexer_l = 0
+        id_of_marginalia = []
+
+        if len(found_polygons_text_region) > 0:
+            id_of_marginalia = xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals)
+            for mm in range(len(found_polygons_text_region)):
+                textregion=ET.SubElement(page, 'TextRegion')
+                textregion.set('id', 'r%s' % id_indexer)
+                id_indexer += 1
+                textregion.set('type', 'paragraph')
+                coord_text = ET.SubElement(textregion, 'Coords')
+                coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
+                id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l)
+                add_textequiv(textregion)
+
+        self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
+        if len(found_polygons_text_region_h) > 0:
+            for mm in range(len(found_polygons_text_region_h)):
+                textregion=ET.SubElement(page, 'TextRegion')
+                textregion.set('id', 'r%s' % id_indexer)
+                id_indexer += 1
+                textregion.set('type','header')
+                coord_text = ET.SubElement(textregion, 'Coords')
+                coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_h, mm, page_coord))
+                id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes, id_indexer_l)
+                add_textequiv(textregion)
+
+        if len(found_polygons_drop_capitals) > 0:
+            id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals)
+            for mm in range(len(found_polygons_drop_capitals)):
+                textregion=ET.SubElement(page, 'TextRegion')
+                textregion.set('id',' r%s' % id_indexer)
+                id_indexer += 1
+                textregion.set('type', 'drop-capital')
+                coord_text = ET.SubElement(textregion, 'Coords')
+                coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals, mm, page_coord))
+                add_textequiv(textregion)
+
+        for mm in range(len(found_polygons_marginals)):
+            marginal = ET.SubElement(page, 'TextRegion')
+            add_textequiv(textregion)
+            marginal.set('id', id_of_marginalia[mm])
+            marginal.set('type', 'marginalia')
+            coord_text = ET.SubElement(marginal, 'Coords')
+            coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord))
+            id_indexer_l = self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, id_indexer_l)
+
+        id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals) + len(found_polygons_drop_capitals)
+        for mm in range(len(found_polygons_text_region_img)):
+            textregion=ET.SubElement(page, 'ImageRegion')
+            textregion.set('id', 'r%s' % id_indexer)
+            id_indexer += 1
+            coord_text = ET.SubElement(textregion, 'Coords')
+            coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_img, mm, page_coord))
+
+        for mm in range(len(found_polygons_tables)):
+            textregion = ET.SubElement(page, 'TableRegion')
+            textregion.set('id', 'r%s' %id_indexer)
+            id_indexer += 1
+            coord_text = ET.SubElement(textregion, 'Coords')
+            coord_text.set('points', self.calculate_polygon_coords(found_polygons_tables, mm, page_coord))
+
+        return pcgts
+
+    def calculate_polygon_coords(self, contour_list, i, page_coord):
+        self.logger.debug('enter calculate_polygon_coords')
+        coords = ''
+        for j in range(len(contour_list[i])):
+            if len(contour_list[i][j]) == 2:
+                coords += str(int((contour_list[i][j][0] + page_coord[2]) / self.scale_x))
+                coords += ','
+                coords += str(int((contour_list[i][j][1] + page_coord[0]) / self.scale_y))
+            else:
+                coords += str(int((contour_list[i][j][0][0] + page_coord[2]) / self.scale_x))
+                coords += ','
+                coords += str(int((contour_list[i][j][0][1] + page_coord[0]) / self.scale_y))
+
+            if j < len(contour_list[i]) - 1:
+                coords=coords + ' '
+        return coords
+
				`@ -0,0 +1 @@`
				`__import__("pkg_resources").declare_namespace(__name__)`