sbb_pixelwise_segmentation/gt_gen_utils.py

import click
import sys
import os
import numpy as np
import warnings
import xml.etree.ElementTree as ET
from tqdm import tqdm
import cv2
from shapely import geometry
from pathlib import Path


KERNEL = np.ones((5, 5), np.uint8)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

def get_content_of_dir(dir_in):
    """
    Listing all ground truth page xml files. All files are needed to have xml format.
    """

    gt_all=os.listdir(dir_in)
    gt_list=[file for file in gt_all if file.split('.')[ len(file.split('.'))-1 ]=='xml' ]
    return gt_list

def return_parent_contours(contours, hierarchy):
    contours_parent = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3] == -1]
    return contours_parent
def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
    found_polygons_early = list()

    jv = 0
    for c in contours:
        if len(np.shape(c)) == 3:
            c = c[0]
        elif len(np.shape(c)) == 2:
            pass
        #c = c[0]
        if len(c) < 3:  # A polygon cannot have less than 3 points
            continue

        c_e = [point for point in c]
        polygon = geometry.Polygon(c_e)
        # area = cv2.contourArea(c)
        area = polygon.area
        # Check that polygon has area greater than minimal area
        if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]):  # and hierarchy[0][jv][3]==-1 :
            found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32))
        jv += 1
    return found_polygons_early

def filter_contours_area_of_image(image, contours, order_index, max_area, min_area):
    found_polygons_early = list()
    order_index_filtered = list()
    #jv = 0
    for jv, c in enumerate(contours):
        if len(np.shape(c)) == 3:
            c = c[0]
        elif len(np.shape(c)) == 2:
            pass
        if len(c) < 3:  # A polygon cannot have less than 3 points
            continue
        c_e = [point for point in c]
        polygon = geometry.Polygon(c_e)
        area = polygon.area
        if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]):  # and hierarchy[0][jv][3]==-1 :
            found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint))
            order_index_filtered.append(order_index[jv])
        #jv += 1
    return found_polygons_early, order_index_filtered

def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):

    # pixels of images are identified by 5
    if len(region_pre_p.shape) == 3:
        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
    else:
        cnts_images = (region_pre_p[:, :] == pixel) * 1
    cnts_images = cnts_images.astype(np.uint8)
    cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
    imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(imgray, 0, 255, 0)

    contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    contours_imgs = return_parent_contours(contours_imgs, hierarchy)
    contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=1, min_area=min_area)

    return contours_imgs
def update_region_contours(co_text, img_boundary, erosion_rate, dilation_rate, y_len, x_len):
    co_text_eroded = []
    for con in co_text:
        img_boundary_in = np.zeros( (y_len,x_len) )
        img_boundary_in = cv2.fillPoly(img_boundary_in, pts=[con], color=(1, 1, 1))

        #img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=7)#asiatica
        if erosion_rate > 0:
            img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=erosion_rate)

        pixel = 1
        min_size = 0
        con_eroded = return_contours_of_interested_region(img_boundary_in,pixel, min_size )

        try:
            co_text_eroded.append(con_eroded[0])
        except:
            co_text_eroded.append(con)


        img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=dilation_rate)
        #img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=5)

        boundary = img_boundary_in_dilated[:,:] - img_boundary_in[:,:]

        img_boundary[:,:][boundary[:,:]==1] =1
    return co_text_eroded, img_boundary
def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_file, config_params, printspace, dir_images, dir_out_images):
    """
    Reading the page xml files and write the ground truth images into given output directory.
    """
    ## to do: add footnote to text regions

    if dir_images:
        ls_org_imgs = os.listdir(dir_images)
        ls_org_imgs_stem = [item.split('.')[0] for item in ls_org_imgs]
    for index in tqdm(range(len(gt_list))):
        #try:
        tree1 = ET.parse(dir_in+'/'+gt_list[index], parser = ET.XMLParser(encoding = 'iso-8859-5'))
        root1=tree1.getroot()
        alltags=[elem.tag for elem in root1.iter()]
        link=alltags[0].split('}')[0]+'}'


        for jj in root1.iter(link+'Page'):
            y_len=int(jj.attrib['imageHeight'])
            x_len=int(jj.attrib['imageWidth'])

        if 'columns_width' in list(config_params.keys()):
            columns_width_dict = config_params['columns_width']
            metadata_element = root1.find(link+'Metadata')
            comment_is_sub_element = False
            for child in metadata_element:
                tag2 = child.tag
                if tag2.endswith('}Comments') or tag2.endswith('}comments'):
                    text_comments = child.text
                    num_col = int(text_comments.split('num_col')[1])
                    comment_is_sub_element = True
            if not comment_is_sub_element:
                num_col = None

            if num_col:
                x_new = columns_width_dict[str(num_col)]
                y_new = int ( x_new * (y_len / float(x_len)) )

        if printspace or "printspace_as_class_in_layout" in list(config_params.keys()):
            region_tags = np.unique([x for x in alltags if x.endswith('PrintSpace') or x.endswith('Border')])
            co_use_case = []

            for tag in region_tags:
                tag_endings = ['}PrintSpace','}Border']

                if tag.endswith(tag_endings[0]) or tag.endswith(tag_endings[1]):
                    for nn in root1.iter(tag):
                        c_t_in = []
                        sumi = 0
                        for vv in nn.iter():
                            # check the format of coords
                            if vv.tag == link + 'Coords':
                                coords = bool(vv.attrib)
                                if coords:
                                    p_h = vv.attrib['points'].split(' ')
                                    c_t_in.append(
                                        np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h]))
                                    break
                                else:
                                    pass

                            if vv.tag == link + 'Point':
                                c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))])
                                sumi += 1
                            elif vv.tag != link + 'Point' and sumi >= 1:
                                break
                        co_use_case.append(np.array(c_t_in))

            img = np.zeros((y_len, x_len, 3))

            img_poly = cv2.fillPoly(img, pts=co_use_case, color=(1, 1, 1))

            img_poly = img_poly.astype(np.uint8)

            imgray = cv2.cvtColor(img_poly, cv2.COLOR_BGR2GRAY)
            _, thresh = cv2.threshold(imgray, 0, 255, 0)

            contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

            cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))])

            cnt = contours[np.argmax(cnt_size)]

            x, y, w, h = cv2.boundingRect(cnt)
            bb_xywh = [x, y, w, h]


        if config_file and (config_params['use_case']=='textline' or config_params['use_case']=='word' or config_params['use_case']=='glyph' or config_params['use_case']=='printspace'):
            keys = list(config_params.keys())
            if "artificial_class_label" in keys:
                artificial_class_rgb_color = (255,255,0)
                artificial_class_label = config_params['artificial_class_label']

            textline_rgb_color = (255, 0, 0)

            if config_params['use_case']=='textline':
                region_tags = np.unique([x for x in alltags if x.endswith('TextLine')])
            elif config_params['use_case']=='word':
                region_tags = np.unique([x for x in alltags if x.endswith('Word')])
            elif config_params['use_case']=='glyph':
                region_tags = np.unique([x for x in alltags if x.endswith('Glyph')])
            elif config_params['use_case']=='printspace':
                region_tags = np.unique([x for x in alltags if x.endswith('PrintSpace')])

            co_use_case = []

            for tag in region_tags:
                if config_params['use_case']=='textline':
                    tag_endings = ['}TextLine','}textline']
                elif config_params['use_case']=='word':
                    tag_endings = ['}Word','}word']
                elif config_params['use_case']=='glyph':
                    tag_endings = ['}Glyph','}glyph']
                elif config_params['use_case']=='printspace':
                    tag_endings = ['}PrintSpace','}printspace']

                if tag.endswith(tag_endings[0]) or tag.endswith(tag_endings[1]):
                    for nn in root1.iter(tag):
                        c_t_in = []
                        sumi = 0
                        for vv in nn.iter():
                            # check the format of coords
                            if vv.tag == link + 'Coords':
                                coords = bool(vv.attrib)
                                if coords:
                                    p_h = vv.attrib['points'].split(' ')
                                    c_t_in.append(
                                        np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h]))
                                    break
                                else:
                                    pass

                            if vv.tag == link + 'Point':
                                c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))])
                                sumi += 1
                            elif vv.tag != link + 'Point' and sumi >= 1:
                                break
                        co_use_case.append(np.array(c_t_in))


            if "artificial_class_label" in keys:
                img_boundary = np.zeros((y_len, x_len))
                erosion_rate = 1
                dilation_rate = 3
                co_use_case, img_boundary = update_region_contours(co_use_case, img_boundary, erosion_rate, dilation_rate, y_len, x_len )


            img = np.zeros((y_len, x_len, 3))
            if output_type == '2d':
                img_poly = cv2.fillPoly(img, pts=co_use_case, color=(1, 1, 1))
                if "artificial_class_label" in keys:
                    img_poly[:,:][img_boundary[:,:]==1] = artificial_class_label
            elif output_type == '3d':
                img_poly = cv2.fillPoly(img, pts=co_use_case, color=textline_rgb_color)
                if "artificial_class_label" in keys:
                    img_poly[:,:,0][img_boundary[:,:]==1] = artificial_class_rgb_color[0]
                    img_poly[:,:,1][img_boundary[:,:]==1] = artificial_class_rgb_color[1]
                    img_poly[:,:,2][img_boundary[:,:]==1] = artificial_class_rgb_color[2]


            if printspace and config_params['use_case']!='printspace':
                img_poly = img_poly[bb_xywh[1]:bb_xywh[1]+bb_xywh[3], bb_xywh[0]:bb_xywh[0]+bb_xywh[2], :]


            if 'columns_width' in list(config_params.keys()) and num_col and config_params['use_case']!='printspace':
                img_poly = resize_image(img_poly, y_new, x_new)

            try:
                xml_file_stem = gt_list[index].split('-')[1].split('.')[0]
                cv2.imwrite(os.path.join(output_dir, xml_file_stem + '.png'), img_poly)
            except:
                xml_file_stem = gt_list[index].split('.')[0]
                cv2.imwrite(os.path.join(output_dir, xml_file_stem + '.png'), img_poly)

            if dir_images:
                org_image_name = ls_org_imgs[ls_org_imgs_stem.index(xml_file_stem)]
                img_org = cv2.imread(os.path.join(dir_images, org_image_name))

                if printspace and config_params['use_case']!='printspace':
                    img_org = img_org[bb_xywh[1]:bb_xywh[1]+bb_xywh[3], bb_xywh[0]:bb_xywh[0]+bb_xywh[2], :]

                if 'columns_width' in list(config_params.keys()) and num_col and config_params['use_case']!='printspace':
                    img_org = resize_image(img_org, y_new, x_new)

                cv2.imwrite(os.path.join(dir_out_images, org_image_name), img_org)


        if config_file and config_params['use_case']=='layout':
            keys = list(config_params.keys())

            if "artificial_class_on_boundary" in keys:
                elements_with_artificial_class = list(config_params['artificial_class_on_boundary'])
                artificial_class_rgb_color = (255,255,0)
                artificial_class_label = config_params['artificial_class_label']
            #values = config_params.values()

            if "printspace_as_class_in_layout" in list(config_params.keys()):
                printspace_class_rgb_color = (125,125,255)
                printspace_class_label = config_params['printspace_as_class_in_layout']

            if 'textregions' in keys:
                types_text_dict = config_params['textregions']
                types_text = list(types_text_dict.keys())
                types_text_label = list(types_text_dict.values())
            if 'graphicregions' in keys:
                types_graphic_dict = config_params['graphicregions']
                types_graphic = list(types_graphic_dict.keys())
                types_graphic_label = list(types_graphic_dict.values())


            labels_rgb_color = [ (0,0,0), (255,0,0), (255,125,0), (255,0,125), (125,255,125), (125,125,0), (0,125,255), (0,125,0), (125,125,125), (255,0,255), (125,0,125), (0,255,0),(0,0,255), (0,255,255), (255,125,125),  (0,125,125), (0,255,125), (255,125,255), (125,255,0)]


            region_tags=np.unique([x for x in alltags if x.endswith('Region')])
            co_text = {'drop-capital':[], "footnote":[], "footnote-continued":[], "heading":[], "signature-mark":[], "header":[], "catch-word":[], "page-number":[], "marginalia":[], "paragraph":[]}
            all_defined_textregion_types = list(co_text.keys())
            co_graphic = {"handwritten-annotation":[], "decoration":[], "stamp":[], "signature":[]}
            all_defined_graphic_types = list(co_graphic.keys())
            co_sep=[]
            co_img=[]
            co_table=[]
            co_noise=[]

            for tag in region_tags:
                if 'textregions' in keys:
                    if tag.endswith('}TextRegion') or tag.endswith('}Textregion'):
                        for nn in root1.iter(tag):
                            c_t_in = {'drop-capital':[], "footnote":[], "footnote-continued":[], "heading":[], "signature-mark":[], "header":[], "catch-word":[], "page-number":[], "marginalia":[], "paragraph":[]}
                            sumi=0
                            for vv in nn.iter():
                                # check the format of coords
                                if vv.tag==link+'Coords':

                                    coords=bool(vv.attrib)
                                    if coords:
                                        p_h=vv.attrib['points'].split(' ')

                                        if "rest_as_paragraph" in types_text:
                                            types_text_without_paragraph = [element for element in types_text if element!='rest_as_paragraph' and element!='paragraph']
                                            if len(types_text_without_paragraph) == 0:
                                                if "type" in nn.attrib:
                                                    c_t_in['paragraph'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                            elif len(types_text_without_paragraph) >= 1:
                                                if "type" in nn.attrib:
                                                    if nn.attrib['type'] in types_text_without_paragraph:
                                                        c_t_in[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                                    else:
                                                        c_t_in['paragraph'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )

                                        else:
                                            if "type" in nn.attrib:
                                                if nn.attrib['type'] in all_defined_textregion_types:
                                                    c_t_in[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )

                                        break
                                    else:
                                        pass


                                if vv.tag==link+'Point':
                                    if "rest_as_paragraph" in types_text:
                                        types_text_without_paragraph = [element for element in types_text if element!='rest_as_paragraph' and element!='paragraph']
                                        if len(types_text_without_paragraph) == 0:
                                            if "type" in nn.attrib:
                                                c_t_in['paragraph'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                sumi+=1
                                        elif len(types_text_without_paragraph) >= 1:
                                            if "type" in nn.attrib:
                                                if nn.attrib['type'] in types_text_without_paragraph:
                                                    c_t_in[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                    sumi+=1
                                                else:
                                                    c_t_in['paragraph'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                    sumi+=1

                                    else:
                                        if "type" in nn.attrib:
                                            if nn.attrib['type'] in all_defined_textregion_types:
                                                c_t_in[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                sumi+=1


                                elif vv.tag!=link+'Point' and sumi>=1:
                                    break

                            for element_text in list(c_t_in.keys()):
                                if len(c_t_in[element_text])>0:
                                    co_text[element_text].append(np.array(c_t_in[element_text]))

                if 'graphicregions' in keys:
                    if tag.endswith('}GraphicRegion') or tag.endswith('}graphicregion'):
                        #print('sth')
                        for nn in root1.iter(tag):
                            c_t_in_graphic = {"handwritten-annotation":[], "decoration":[], "stamp":[], "signature":[]}
                            sumi=0
                            for vv in nn.iter():
                                # check the format of coords
                                if vv.tag==link+'Coords':
                                    coords=bool(vv.attrib)
                                    if coords:
                                        p_h=vv.attrib['points'].split(' ')

                                        if "rest_as_decoration" in types_graphic:
                                            types_graphic_without_decoration = [element for element in types_graphic if element!='rest_as_decoration' and element!='decoration']
                                            if len(types_graphic_without_decoration) == 0:
                                                if "type" in nn.attrib:
                                                    c_t_in_graphic['decoration'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                            elif len(types_graphic_without_decoration) >= 1:
                                                if "type" in nn.attrib:
                                                    if nn.attrib['type'] in types_graphic_without_decoration:
                                                        c_t_in_graphic[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                                    else:
                                                        c_t_in_graphic['decoration'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )

                                        else:
                                            if "type" in nn.attrib:
                                                if nn.attrib['type'] in all_defined_graphic_types:
                                                    c_t_in_graphic[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )

                                        break
                                    else:
                                        pass


                                if vv.tag==link+'Point':
                                    if "rest_as_decoration" in types_graphic:
                                        types_graphic_without_decoration = [element for element in types_graphic if element!='rest_as_decoration' and element!='decoration']
                                        if len(types_graphic_without_decoration) == 0:
                                            if "type" in nn.attrib:
                                                c_t_in_graphic['decoration'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                sumi+=1
                                        elif len(types_graphic_without_decoration) >= 1:
                                            if "type" in nn.attrib:
                                                if nn.attrib['type'] in types_graphic_without_decoration:
                                                    c_t_in_graphic[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                    sumi+=1
                                                else:
                                                    c_t_in_graphic['decoration'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                    sumi+=1

                                    else:
                                        if "type" in nn.attrib:
                                            if nn.attrib['type'] in all_defined_graphic_types:
                                                c_t_in_graphic[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                sumi+=1

                                elif vv.tag!=link+'Point' and sumi>=1:
                                    break

                            for element_graphic in list(c_t_in_graphic.keys()):
                                if len(c_t_in_graphic[element_graphic])>0:
                                    co_graphic[element_graphic].append(np.array(c_t_in_graphic[element_graphic]))


                if 'imageregion' in keys:
                    if tag.endswith('}ImageRegion') or tag.endswith('}imageregion'):
                        for nn in root1.iter(tag):
                            c_t_in=[]
                            sumi=0
                            for vv in nn.iter():
                                if vv.tag==link+'Coords':
                                    coords=bool(vv.attrib)
                                    if coords:
                                        p_h=vv.attrib['points'].split(' ')
                                        c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                        break
                                    else:
                                        pass


                                if vv.tag==link+'Point':
                                    c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                                    sumi+=1

                                elif vv.tag!=link+'Point' and sumi>=1:
                                    break
                            co_img.append(np.array(c_t_in))


                if 'separatorregion' in keys:
                    if tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'):
                        for nn in root1.iter(tag):
                            c_t_in=[]
                            sumi=0
                            for vv in nn.iter():
                                # check the format of coords
                                if vv.tag==link+'Coords':
                                    coords=bool(vv.attrib)
                                    if coords:
                                        p_h=vv.attrib['points'].split(' ')
                                        c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                        break
                                    else:
                                        pass


                                if vv.tag==link+'Point':
                                    c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                                    sumi+=1

                                elif vv.tag!=link+'Point' and sumi>=1:
                                    break
                            co_sep.append(np.array(c_t_in))


                if 'tableregion' in keys:
                    if tag.endswith('}TableRegion') or tag.endswith('}tableregion'):
                        #print('sth')
                        for nn in root1.iter(tag):
                            c_t_in=[]
                            sumi=0
                            for vv in nn.iter():
                                # check the format of coords
                                if vv.tag==link+'Coords':
                                    coords=bool(vv.attrib)
                                    if coords:
                                        p_h=vv.attrib['points'].split(' ')
                                        c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                        break
                                    else:
                                        pass


                                if vv.tag==link+'Point':
                                    c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                                    sumi+=1
                                #print(vv.tag,'in')
                                elif vv.tag!=link+'Point' and sumi>=1:
                                    break
                            co_table.append(np.array(c_t_in))

                if 'noiseregion' in keys:
                    if tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'):
                        #print('sth')
                        for nn in root1.iter(tag):
                            c_t_in=[]
                            sumi=0
                            for vv in nn.iter():
                                # check the format of coords
                                if vv.tag==link+'Coords':
                                    coords=bool(vv.attrib)
                                    if coords:
                                        p_h=vv.attrib['points'].split(' ')
                                        c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                        break
                                    else:
                                        pass


                                if vv.tag==link+'Point':
                                    c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                                    sumi+=1
                                #print(vv.tag,'in')
                                elif vv.tag!=link+'Point' and sumi>=1:
                                    break
                            co_noise.append(np.array(c_t_in))

            if "artificial_class_on_boundary" in keys:
                img_boundary = np.zeros( (y_len,x_len) )
                if "paragraph" in elements_with_artificial_class:
                    erosion_rate = 2
                    dilation_rate = 4
                    co_text['paragraph'], img_boundary = update_region_contours(co_text['paragraph'], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "drop-capital" in elements_with_artificial_class:
                    erosion_rate = 1
                    dilation_rate = 3
                    co_text["drop-capital"], img_boundary = update_region_contours(co_text["drop-capital"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "catch-word" in elements_with_artificial_class:
                    erosion_rate = 0
                    dilation_rate = 3#4
                    co_text["catch-word"], img_boundary = update_region_contours(co_text["catch-word"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "page-number" in elements_with_artificial_class:
                    erosion_rate = 0
                    dilation_rate = 3#4
                    co_text["page-number"], img_boundary = update_region_contours(co_text["page-number"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "header" in elements_with_artificial_class:
                    erosion_rate = 1
                    dilation_rate = 4
                    co_text["header"], img_boundary = update_region_contours(co_text["header"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "heading" in elements_with_artificial_class:
                    erosion_rate = 1
                    dilation_rate = 4
                    co_text["heading"], img_boundary = update_region_contours(co_text["heading"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "signature-mark" in elements_with_artificial_class:
                    erosion_rate = 1
                    dilation_rate = 4
                    co_text["signature-mark"], img_boundary = update_region_contours(co_text["signature-mark"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "marginalia" in elements_with_artificial_class:
                    erosion_rate = 2
                    dilation_rate = 4
                    co_text["marginalia"], img_boundary = update_region_contours(co_text["marginalia"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "footnote" in elements_with_artificial_class:
                    erosion_rate = 0#2
                    dilation_rate = 2#4
                    co_text["footnote"], img_boundary = update_region_contours(co_text["footnote"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "footnote-continued" in elements_with_artificial_class:
                    erosion_rate = 0#2
                    dilation_rate = 2#4
                    co_text["footnote-continued"], img_boundary = update_region_contours(co_text["footnote-continued"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )


            img = np.zeros( (y_len,x_len,3) )

            if output_type == '3d':
                if 'graphicregions' in keys:
                    if 'rest_as_decoration' in types_graphic:
                        types_graphic[types_graphic=='rest_as_decoration'] = 'decoration'
                        for element_graphic in types_graphic:
                            if element_graphic == 'decoration':
                                color_label = labels_rgb_color[ config_params['graphicregions']['rest_as_decoration']]
                            else:
                                color_label = labels_rgb_color[ config_params['graphicregions'][element_graphic]]
                            img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label)
                    else:
                        for element_graphic in types_graphic:
                            color_label = labels_rgb_color[ config_params['graphicregions'][element_graphic]]
                            img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label)


                if 'imageregion' in keys:
                    img_poly=cv2.fillPoly(img, pts =co_img, color=labels_rgb_color[ config_params['imageregion']])
                if 'tableregion' in keys:
                    img_poly=cv2.fillPoly(img, pts =co_table, color=labels_rgb_color[ config_params['tableregion']])
                if 'noiseregion' in keys:
                    img_poly=cv2.fillPoly(img, pts =co_noise, color=labels_rgb_color[ config_params['noiseregion']])

                if 'textregions' in keys:
                    if 'rest_as_paragraph' in types_text:
                        types_text = ['paragraph'if ttind=='rest_as_paragraph' else ttind for ttind in types_text]
                        for element_text in types_text:
                            if element_text == 'paragraph':
                                color_label = labels_rgb_color[ config_params['textregions']['rest_as_paragraph']]
                            else:
                                color_label = labels_rgb_color[ config_params['textregions'][element_text]]
                            img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label)
                    else:
                        for element_text in types_text:
                            color_label = labels_rgb_color[ config_params['textregions'][element_text]]
                            img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label)


                if "artificial_class_on_boundary" in keys:
                    img_poly[:,:,0][img_boundary[:,:]==1] = artificial_class_rgb_color[0]
                    img_poly[:,:,1][img_boundary[:,:]==1] = artificial_class_rgb_color[1]
                    img_poly[:,:,2][img_boundary[:,:]==1] = artificial_class_rgb_color[2]

                if 'separatorregion' in keys:
                    img_poly=cv2.fillPoly(img, pts =co_sep, color=labels_rgb_color[ config_params['separatorregion']])


                if "printspace_as_class_in_layout" in list(config_params.keys()):
                    printspace_mask = np.zeros((img_poly.shape[0], img_poly.shape[1]))
                    printspace_mask[bb_xywh[1]:bb_xywh[1]+bb_xywh[3], bb_xywh[0]:bb_xywh[0]+bb_xywh[2]] = 1

                    img_poly[:,:,0][printspace_mask[:,:] == 0] = printspace_class_rgb_color[0]
                    img_poly[:,:,1][printspace_mask[:,:] == 0] = printspace_class_rgb_color[1]
                    img_poly[:,:,2][printspace_mask[:,:] == 0] = printspace_class_rgb_color[2]


            elif output_type == '2d':
                if 'graphicregions' in keys:
                    if 'rest_as_decoration' in types_graphic:
                        types_graphic[types_graphic=='rest_as_decoration'] = 'decoration'
                        for element_graphic in types_graphic:
                            if element_graphic == 'decoration':
                                color_label = config_params['graphicregions']['rest_as_decoration']
                            else:
                                color_label = config_params['graphicregions'][element_graphic]
                            img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label)
                    else:
                        for element_graphic in types_graphic:
                            color_label = config_params['graphicregions'][element_graphic]
                            img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label)


                if 'imageregion' in keys:
                    color_label = config_params['imageregion']
                    img_poly=cv2.fillPoly(img, pts =co_img, color=(color_label,color_label,color_label))
                if 'tableregion' in keys:
                    color_label = config_params['tableregion']
                    img_poly=cv2.fillPoly(img, pts =co_table, color=(color_label,color_label,color_label))
                if 'noiseregion' in keys:
                    color_label = config_params['noiseregion']
                    img_poly=cv2.fillPoly(img, pts =co_noise, color=(color_label,color_label,color_label))

                if 'textregions' in keys:
                    if 'rest_as_paragraph' in types_text:
                        types_text = ['paragraph'if ttind=='rest_as_paragraph' else ttind for ttind in types_text]
                        for element_text in types_text:
                            if element_text == 'paragraph':
                                color_label = config_params['textregions']['rest_as_paragraph']
                            else:
                                color_label = config_params['textregions'][element_text]
                            img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label)
                    else:
                        for element_text in types_text:
                            color_label = config_params['textregions'][element_text]
                            img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label)

                if "artificial_class_on_boundary" in keys:
                    img_poly[:,:][img_boundary[:,:]==1] = artificial_class_label

                if 'separatorregion' in keys:
                    color_label = config_params['separatorregion']
                    img_poly=cv2.fillPoly(img, pts =co_sep, color=(color_label,color_label,color_label))

                if "printspace_as_class_in_layout" in list(config_params.keys()):
                    printspace_mask = np.zeros((img_poly.shape[0], img_poly.shape[1]))
                    printspace_mask[bb_xywh[1]:bb_xywh[1]+bb_xywh[3], bb_xywh[0]:bb_xywh[0]+bb_xywh[2]] = 1

                    img_poly[:,:,0][printspace_mask[:,:] == 0] = printspace_class_label
                    img_poly[:,:,1][printspace_mask[:,:] == 0] = printspace_class_label
                    img_poly[:,:,2][printspace_mask[:,:] == 0] = printspace_class_label


            if printspace:
                img_poly = img_poly[bb_xywh[1]:bb_xywh[1]+bb_xywh[3], bb_xywh[0]:bb_xywh[0]+bb_xywh[2], :]

            if 'columns_width' in list(config_params.keys()) and num_col:
                img_poly = resize_image(img_poly, y_new, x_new)

            try:
                xml_file_stem = gt_list[index].split('-')[1].split('.')[0]
                cv2.imwrite(os.path.join(output_dir, xml_file_stem + '.png'), img_poly)
            except:
                xml_file_stem = gt_list[index].split('.')[0]
                cv2.imwrite(os.path.join(output_dir, xml_file_stem + '.png'), img_poly)


            if dir_images:
                org_image_name = ls_org_imgs[ls_org_imgs_stem.index(xml_file_stem)]
                img_org = cv2.imread(os.path.join(dir_images, org_image_name))

                if printspace:
                    img_org = img_org[bb_xywh[1]:bb_xywh[1]+bb_xywh[3], bb_xywh[0]:bb_xywh[0]+bb_xywh[2], :]

                if 'columns_width' in list(config_params.keys()) and num_col:
                    img_org = resize_image(img_org, y_new, x_new)

                cv2.imwrite(os.path.join(dir_out_images, org_image_name), img_org)


def find_new_features_of_contours(contours_main):

    areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
    M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
    cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
    cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
    try:
        x_min_main = np.array([np.min(contours_main[j][0][:, 0]) for j in range(len(contours_main))])

        argmin_x_main = np.array([np.argmin(contours_main[j][0][:, 0]) for j in range(len(contours_main))])

        x_min_from_argmin = np.array([contours_main[j][0][argmin_x_main[j], 0] for j in range(len(contours_main))])
        y_corr_x_min_from_argmin = np.array([contours_main[j][0][argmin_x_main[j], 1] for j in range(len(contours_main))])

        x_max_main = np.array([np.max(contours_main[j][0][:, 0]) for j in range(len(contours_main))])

        y_min_main = np.array([np.min(contours_main[j][0][:, 1]) for j in range(len(contours_main))])
        y_max_main = np.array([np.max(contours_main[j][0][:, 1]) for j in range(len(contours_main))])
    except:
        x_min_main = np.array([np.min(contours_main[j][:, 0]) for j in range(len(contours_main))])

        argmin_x_main = np.array([np.argmin(contours_main[j][:, 0]) for j in range(len(contours_main))])

        x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0] for j in range(len(contours_main))])
        y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1] for j in range(len(contours_main))])

        x_max_main = np.array([np.max(contours_main[j][:, 0]) for j in range(len(contours_main))])

        y_min_main = np.array([np.min(contours_main[j][:, 1]) for j in range(len(contours_main))])
        y_max_main = np.array([np.max(contours_main[j][:, 1]) for j in range(len(contours_main))])

    return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin
def read_xml(xml_file):
    file_name = Path(xml_file).stem
    tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding = 'iso-8859-5'))
    root1=tree1.getroot()
    alltags=[elem.tag for elem in root1.iter()]
    link=alltags[0].split('}')[0]+'}'

    index_tot_regions = []
    tot_region_ref = []

    for jj in root1.iter(link+'Page'):
        y_len=int(jj.attrib['imageHeight'])
        x_len=int(jj.attrib['imageWidth'])

    for jj in root1.iter(link+'RegionRefIndexed'):
        index_tot_regions.append(jj.attrib['index'])
        tot_region_ref.append(jj.attrib['regionRef'])

    if (link+'PrintSpace' in alltags) or  (link+'Border' in alltags):
        co_printspace = []
        if link+'PrintSpace' in alltags:
            region_tags_printspace = np.unique([x for x in alltags if x.endswith('PrintSpace')])
        elif link+'Border' in alltags:
            region_tags_printspace = np.unique([x for x in alltags if x.endswith('Border')])

        for tag in region_tags_printspace:
            if link+'PrintSpace' in alltags:
                tag_endings_printspace = ['}PrintSpace','}printspace']
            elif link+'Border' in alltags:
                tag_endings_printspace = ['}Border','}border']

            if tag.endswith(tag_endings_printspace[0]) or tag.endswith(tag_endings_printspace[1]):
                for nn in root1.iter(tag):
                    c_t_in = []
                    sumi = 0
                    for vv in nn.iter():
                        # check the format of coords
                        if vv.tag == link + 'Coords':
                            coords = bool(vv.attrib)
                            if coords:
                                p_h = vv.attrib['points'].split(' ')
                                c_t_in.append(
                                    np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h]))
                                break
                            else:
                                pass

                        if vv.tag == link + 'Point':
                            c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))])
                            sumi += 1
                        elif vv.tag != link + 'Point' and sumi >= 1:
                            break
                    co_printspace.append(np.array(c_t_in))
        img_printspace = np.zeros( (y_len,x_len,3) )
        img_printspace=cv2.fillPoly(img_printspace, pts =co_printspace, color=(1,1,1))
        img_printspace = img_printspace.astype(np.uint8)

        imgray = cv2.cvtColor(img_printspace, cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(imgray, 0, 255, 0)
        contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))])
        cnt = contours[np.argmax(cnt_size)]
        x, y, w, h = cv2.boundingRect(cnt)

        bb_coord_printspace = [x, y, w, h]

    else:
        bb_coord_printspace = None


    region_tags=np.unique([x for x in alltags if x.endswith('Region')])
    co_text_paragraph=[]
    co_text_drop=[]
    co_text_heading=[]
    co_text_header=[]
    co_text_marginalia=[]
    co_text_catch=[]
    co_text_page_number=[]
    co_text_signature_mark=[]
    co_sep=[]
    co_img=[]
    co_table=[]
    co_graphic=[]
    co_graphic_text_annotation=[]
    co_graphic_decoration=[]
    co_noise=[]

    co_text_paragraph_text=[]
    co_text_drop_text=[]
    co_text_heading_text=[]
    co_text_header_text=[]
    co_text_marginalia_text=[]
    co_text_catch_text=[]
    co_text_page_number_text=[]
    co_text_signature_mark_text=[]
    co_sep_text=[]
    co_img_text=[]
    co_table_text=[]
    co_graphic_text=[]
    co_graphic_text_annotation_text=[]
    co_graphic_decoration_text=[]
    co_noise_text=[]

    id_paragraph = []
    id_header = []
    id_heading = []
    id_marginalia = []

    for tag in region_tags:
        if tag.endswith('}TextRegion') or tag.endswith('}Textregion'):
            for nn in root1.iter(tag):
                for child2 in nn:
                    tag2 = child2.tag
                    if tag2.endswith('}TextEquiv') or tag2.endswith('}TextEquiv'):
                        for childtext2 in child2:
                            if childtext2.tag.endswith('}Unicode') or childtext2.tag.endswith('}Unicode'):
                                if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
                                    co_text_drop_text.append(childtext2.text)
                                elif "type" in nn.attrib and nn.attrib['type']=='heading':
                                    co_text_heading_text.append(childtext2.text)
                                elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':
                                    co_text_signature_mark_text.append(childtext2.text)
                                elif "type" in nn.attrib and nn.attrib['type']=='header':
                                    co_text_header_text.append(childtext2.text)
                                ###elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
                                    ###co_text_catch_text.append(childtext2.text)
                                ###elif "type" in nn.attrib and nn.attrib['type']=='page-number':
                                    ###co_text_page_number_text.append(childtext2.text)
                                elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
                                    co_text_marginalia_text.append(childtext2.text)
                                else:
                                    co_text_paragraph_text.append(childtext2.text)
                c_t_in_drop=[]
                c_t_in_paragraph=[]
                c_t_in_heading=[]
                c_t_in_header=[]
                c_t_in_page_number=[]
                c_t_in_signature_mark=[]
                c_t_in_catch=[]
                c_t_in_marginalia=[]


                sumi=0
                for vv in nn.iter():
                    # check the format of coords
                    if vv.tag==link+'Coords':

                        coords=bool(vv.attrib)
                        if coords:
                            #print('birda1')
                            p_h=vv.attrib['points'].split(' ')


                            if "type" in nn.attrib and nn.attrib['type']=='drop-capital':

                                c_t_in_drop.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )

                            elif "type" in nn.attrib and nn.attrib['type']=='heading':
                                ##id_heading.append(nn.attrib['id'])
                                c_t_in_heading.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )


                            elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':

                                c_t_in_signature_mark.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                #print(c_t_in_paragraph)
                            elif "type" in nn.attrib and nn.attrib['type']=='header':
                                #id_header.append(nn.attrib['id'])
                                c_t_in_header.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )


                            ###elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
                                ###c_t_in_catch.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )


                            ###elif "type" in nn.attrib and nn.attrib['type']=='page-number':

                                ###c_t_in_page_number.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )

                            elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
                                #id_marginalia.append(nn.attrib['id'])

                                c_t_in_marginalia.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                            else:
                                #id_paragraph.append(nn.attrib['id'])

                                c_t_in_paragraph.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )

                            break
                        else:
                            pass


                    if vv.tag==link+'Point':
                        if "type" in nn.attrib and nn.attrib['type']=='drop-capital':

                            c_t_in_drop.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1

                        elif "type" in nn.attrib and nn.attrib['type']=='heading':
                            #id_heading.append(nn.attrib['id'])
                            c_t_in_heading.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1


                        elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':

                            c_t_in_signature_mark.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1
                        elif "type" in nn.attrib and nn.attrib['type']=='header':
                            #id_header.append(nn.attrib['id'])
                            c_t_in_header.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1


                        ###elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
                            ###c_t_in_catch.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            ###sumi+=1

                        ###elif "type" in nn.attrib and nn.attrib['type']=='page-number':

                            ###c_t_in_page_number.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            ###sumi+=1

                        elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
                            #id_marginalia.append(nn.attrib['id'])

                            c_t_in_marginalia.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1

                        else:
                            #id_paragraph.append(nn.attrib['id'])
                            c_t_in_paragraph.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1

                    elif vv.tag!=link+'Point' and sumi>=1:
                        break

                if len(c_t_in_drop)>0:
                    co_text_drop.append(np.array(c_t_in_drop))
                if len(c_t_in_paragraph)>0:
                    co_text_paragraph.append(np.array(c_t_in_paragraph))
                    id_paragraph.append(nn.attrib['id'])
                if len(c_t_in_heading)>0:
                    co_text_heading.append(np.array(c_t_in_heading))
                    id_heading.append(nn.attrib['id'])

                if len(c_t_in_header)>0:
                    co_text_header.append(np.array(c_t_in_header))
                    id_header.append(nn.attrib['id'])
                if len(c_t_in_page_number)>0:
                    co_text_page_number.append(np.array(c_t_in_page_number))
                if len(c_t_in_catch)>0:
                    co_text_catch.append(np.array(c_t_in_catch))

                if len(c_t_in_signature_mark)>0:
                    co_text_signature_mark.append(np.array(c_t_in_signature_mark))

                if len(c_t_in_marginalia)>0:
                    co_text_marginalia.append(np.array(c_t_in_marginalia))
                    id_marginalia.append(nn.attrib['id'])


        elif tag.endswith('}GraphicRegion') or tag.endswith('}graphicregion'):
            for nn in root1.iter(tag):
                c_t_in=[]
                c_t_in_text_annotation=[]
                c_t_in_decoration=[]
                sumi=0
                for vv in nn.iter():
                    # check the format of coords
                    if vv.tag==link+'Coords':
                        coords=bool(vv.attrib)
                        if coords:
                            p_h=vv.attrib['points'].split(' ')

                            if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
                                c_t_in_text_annotation.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )

                            elif "type" in nn.attrib and nn.attrib['type']=='decoration':
                                c_t_in_decoration.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )

                            else:
                                c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )


                            break
                        else:
                            pass


                    if vv.tag==link+'Point':
                        if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
                            c_t_in_text_annotation.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1

                        elif "type" in nn.attrib and nn.attrib['type']=='decoration':
                            c_t_in_decoration.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1

                        else:
                            c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1

                if len(c_t_in_text_annotation)>0:
                    co_graphic_text_annotation.append(np.array(c_t_in_text_annotation))
                if len(c_t_in_decoration)>0:
                    co_graphic_decoration.append(np.array(c_t_in_decoration))
                if len(c_t_in)>0:
                    co_graphic.append(np.array(c_t_in))


        elif tag.endswith('}ImageRegion') or tag.endswith('}imageregion'):
            for nn in root1.iter(tag):
                c_t_in=[]
                sumi=0
                for vv in nn.iter():
                    # check the format of coords
                    if vv.tag==link+'Coords':
                        coords=bool(vv.attrib)
                        if coords:
                            p_h=vv.attrib['points'].split(' ')
                            c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                            break
                        else:
                            pass


                    if vv.tag==link+'Point':
                        c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                        sumi+=1
                    elif vv.tag!=link+'Point' and sumi>=1:
                        break
                co_img.append(np.array(c_t_in))
                co_img_text.append(' ')


        elif tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'):
            for nn in root1.iter(tag):
                c_t_in=[]
                sumi=0
                for vv in nn.iter():
                    # check the format of coords
                    if vv.tag==link+'Coords':
                        coords=bool(vv.attrib)
                        if coords:
                            p_h=vv.attrib['points'].split(' ')
                            c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                            break
                        else:
                            pass


                    if vv.tag==link+'Point':
                        c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                        sumi+=1
                    elif vv.tag!=link+'Point' and sumi>=1:
                        break
                co_sep.append(np.array(c_t_in))


        elif tag.endswith('}TableRegion') or tag.endswith('}tableregion'):
            for nn in root1.iter(tag):
                c_t_in=[]
                sumi=0
                for vv in nn.iter():
                    # check the format of coords
                    if vv.tag==link+'Coords':
                        coords=bool(vv.attrib)
                        if coords:
                            p_h=vv.attrib['points'].split(' ')
                            c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                            break
                        else:
                            pass


                    if vv.tag==link+'Point':
                        c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                        sumi+=1

                    elif vv.tag!=link+'Point' and sumi>=1:
                        break
                co_table.append(np.array(c_t_in))
                co_table_text.append(' ')

        elif tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'):
            for nn in root1.iter(tag):
                c_t_in=[]
                sumi=0
                for vv in nn.iter():
                    # check the format of coords
                    if vv.tag==link+'Coords':
                        coords=bool(vv.attrib)
                        if coords:
                            p_h=vv.attrib['points'].split(' ')
                            c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                            break
                        else:
                            pass


                    if vv.tag==link+'Point':
                        c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                        sumi+=1

                    elif vv.tag!=link+'Point' and sumi>=1:
                        break
                co_noise.append(np.array(c_t_in))
                co_noise_text.append(' ')

    img = np.zeros( (y_len,x_len,3) )
    img_poly=cv2.fillPoly(img, pts =co_text_paragraph, color=(1,1,1))

    img_poly=cv2.fillPoly(img, pts =co_text_heading, color=(2,2,2))
    img_poly=cv2.fillPoly(img, pts =co_text_header, color=(2,2,2))
    img_poly=cv2.fillPoly(img, pts =co_text_marginalia, color=(3,3,3))
    img_poly=cv2.fillPoly(img, pts =co_img, color=(4,4,4))
    img_poly=cv2.fillPoly(img, pts =co_sep, color=(5,5,5))

    return tree1, root1, bb_coord_printspace, file_name, id_paragraph, id_header+id_heading, co_text_paragraph, co_text_header+co_text_heading,\
tot_region_ref,x_len, y_len,index_tot_regions, img_poly


def bounding_box(cnt,color, corr_order_index ):
    x, y, w, h = cv2.boundingRect(cnt)
    x = int(x*scale_w)
    y = int(y*scale_h)

    w = int(w*scale_w)
    h = int(h*scale_h)

    return [x,y,w,h,int(color), int(corr_order_index)+1]

def resize_image(seg_in,input_height,input_width):
    return cv2.resize(seg_in,(input_width,input_height),interpolation=cv2.INTER_NEAREST)

def make_image_from_bb(width_l, height_l, bb_all):
    bb_all =np.array(bb_all)
    img_remade = np.zeros((height_l,width_l ))

    for i in range(bb_all.shape[0]):
        img_remade[bb_all[i,1]:bb_all[i,1]+bb_all[i,3],bb_all[i,0]:bb_all[i,0]+bb_all[i,2] ] = 1
    return img_remade

def update_list_and_return_first_with_length_bigger_than_one(index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp):
    list_inp.pop(index_element_to_be_updated)
    if len(pr_list)>0:
        list_inp.insert(index_element_to_be_updated, pr_list)
    else:
        index_element_to_be_updated = index_element_to_be_updated -1

    list_inp.insert(index_element_to_be_updated+1, [innner_index_pr_pos])
    if len(pos_list)>0:
        list_inp.insert(index_element_to_be_updated+2, pos_list)

    len_all_elements = [len(i) for i in list_inp]
    list_len_bigger_1 = np.where(np.array(len_all_elements)>1)
    list_len_bigger_1 = list_len_bigger_1[0]

    if len(list_len_bigger_1)>0:
        early_list_bigger_than_one = list_len_bigger_1[0]
    else:
        early_list_bigger_than_one = -20
    return list_inp, early_list_bigger_than_one