sbb_pixelwise_segmentation/gt_gen_utils.py

import click
import sys
import os
import numpy as np
import warnings
import xml.etree.ElementTree as ET
from tqdm import tqdm
import cv2
from shapely import geometry
from pathlib import Path


KERNEL = np.ones((5, 5), np.uint8)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

def get_content_of_dir(dir_in):
    """
    Listing all ground truth page xml files. All files are needed to have xml format.
    """

    gt_all=os.listdir(dir_in)
    gt_list=[file for file in gt_all if file.split('.')[ len(file.split('.'))-1 ]=='xml' ]
    return gt_list
    
def return_parent_contours(contours, hierarchy):
    contours_parent = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3] == -1]
    return contours_parent
def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
    found_polygons_early = list()

    jv = 0
    for c in contours:
        if len(c) < 3:  # A polygon cannot have less than 3 points
            continue

        polygon = geometry.Polygon([point[0] for point in c])
        # area = cv2.contourArea(c)
        area = polygon.area
        ##print(np.prod(thresh.shape[:2]))
        # Check that polygon has area greater than minimal area
        # print(hierarchy[0][jv][3],hierarchy )
        if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]):  # and hierarchy[0][jv][3]==-1 :
            # print(c[0][0][1])
            found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32))
        jv += 1
    return found_polygons_early

def filter_contours_area_of_image(image, contours, order_index, max_area, min_area):
    found_polygons_early = list()
    order_index_filtered = list()
    #jv = 0
    for jv, c in enumerate(contours):
        #print(len(c[0]))
        c = c[0]
        if len(c) < 3:  # A polygon cannot have less than 3 points
            continue
        c_e = [point for point in c]
        #print(c_e)
        polygon = geometry.Polygon(c_e)
        area = polygon.area
        #print(area,'area')
        if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]):  # and hierarchy[0][jv][3]==-1 :
            found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint))
            order_index_filtered.append(order_index[jv])
        #jv += 1
    return found_polygons_early, order_index_filtered

def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):

    # pixels of images are identified by 5
    if len(region_pre_p.shape) == 3:
        cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
    else:
        cnts_images = (region_pre_p[:, :] == pixel) * 1
    cnts_images = cnts_images.astype(np.uint8)
    cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
    imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(imgray, 0, 255, 0)

    contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    contours_imgs = return_parent_contours(contours_imgs, hierarchy)
    contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=1, min_area=min_area)

    return contours_imgs
def update_region_contours(co_text, img_boundary, erosion_rate, dilation_rate, y_len, x_len):
    co_text_eroded = []
    for con in co_text:
        #try:
        img_boundary_in = np.zeros( (y_len,x_len) )
        img_boundary_in = cv2.fillPoly(img_boundary_in, pts=[con], color=(1, 1, 1))
        #print('bidiahhhhaaa')
        
        
        #img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=7)#asiatica
        if erosion_rate > 0:
            img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=erosion_rate)
        
        pixel = 1
        min_size = 0
        con_eroded = return_contours_of_interested_region(img_boundary_in,pixel, min_size )
        
        try:
            co_text_eroded.append(con_eroded[0])
        except:
            co_text_eroded.append(con)
        

        img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=dilation_rate)
        #img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=5)
        
        boundary = img_boundary_in_dilated[:,:] - img_boundary_in[:,:]
        
        img_boundary[:,:][boundary[:,:]==1] =1
    return co_text_eroded, img_boundary
def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_file, config_params):
    """
    Reading the page xml files and write the ground truth images into given output directory.
    """
    ## to do: add footnote to text regions
    for index in tqdm(range(len(gt_list))):
        #try:
        tree1 = ET.parse(dir_in+'/'+gt_list[index])
        root1=tree1.getroot()
        alltags=[elem.tag for elem in root1.iter()]
        link=alltags[0].split('}')[0]+'}'
                            
        
        for jj in root1.iter(link+'Page'):
            y_len=int(jj.attrib['imageHeight'])
            x_len=int(jj.attrib['imageWidth'])
            
        if config_file and (config_params['use_case']=='textline' or config_params['use_case']=='word' or config_params['use_case']=='glyph' or config_params['use_case']=='printspace'):
            keys = list(config_params.keys())
            if "artificial_class_label" in keys:
                artificial_class_rgb_color = (255,255,0)
                artificial_class_label = config_params['artificial_class_label']
                
            textline_rgb_color = (255, 0, 0)
                
            if config_params['use_case']=='textline':
                region_tags = np.unique([x for x in alltags if x.endswith('TextLine')])
            elif config_params['use_case']=='word':
                region_tags = np.unique([x for x in alltags if x.endswith('Word')])
            elif config_params['use_case']=='glyph':
                region_tags = np.unique([x for x in alltags if x.endswith('Glyph')])
            elif config_params['use_case']=='printspace':
                region_tags = np.unique([x for x in alltags if x.endswith('PrintSpace')])
                
            co_use_case = []

            for tag in region_tags:
                if config_params['use_case']=='textline':
                    tag_endings = ['}TextLine','}textline']
                elif config_params['use_case']=='word':
                    tag_endings = ['}Word','}word']
                elif config_params['use_case']=='glyph':
                    tag_endings = ['}Glyph','}glyph']
                elif config_params['use_case']=='printspace':
                    tag_endings = ['}PrintSpace','}printspace']
                    
                if tag.endswith(tag_endings[0]) or tag.endswith(tag_endings[1]):
                    for nn in root1.iter(tag):
                        c_t_in = []
                        sumi = 0
                        for vv in nn.iter():
                            # check the format of coords
                            if vv.tag == link + 'Coords':
                                coords = bool(vv.attrib)
                                if coords:
                                    p_h = vv.attrib['points'].split(' ')
                                    c_t_in.append(
                                        np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h]))
                                    break
                                else:
                                    pass

                            if vv.tag == link + 'Point':
                                c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))])
                                sumi += 1
                            elif vv.tag != link + 'Point' and sumi >= 1:
                                break
                        co_use_case.append(np.array(c_t_in))
                        
                        
            if "artificial_class_label" in keys:
                img_boundary = np.zeros((y_len, x_len))
                erosion_rate = 1
                dilation_rate = 3
                co_use_case, img_boundary = update_region_contours(co_use_case, img_boundary, erosion_rate, dilation_rate, y_len, x_len )
            
                
            img = np.zeros((y_len, x_len, 3))
            if output_type == '2d':
                img_poly = cv2.fillPoly(img, pts=co_use_case, color=(1, 1, 1))
                if "artificial_class_label" in keys:
                    img_poly[:,:][img_boundary[:,:]==1] = artificial_class_label
            elif output_type == '3d':
                img_poly = cv2.fillPoly(img, pts=co_use_case, color=textline_rgb_color)
                if "artificial_class_label" in keys:
                    img_poly[:,:,0][img_boundary[:,:]==1] = artificial_class_rgb_color[0]
                    img_poly[:,:,1][img_boundary[:,:]==1] = artificial_class_rgb_color[1]
                    img_poly[:,:,2][img_boundary[:,:]==1] = artificial_class_rgb_color[2]

            try:
                cv2.imwrite(output_dir + '/' + gt_list[index].split('-')[1].split('.')[0] + '.png',
                            img_poly)
            except:
                cv2.imwrite(output_dir + '/' + gt_list[index].split('.')[0] + '.png', img_poly)

            
        if config_file and config_params['use_case']=='layout':
            keys = list(config_params.keys())
            if "artificial_class_on_boundry" in keys:
                elements_with_artificial_class = list(config_params['artificial_class_on_boundry'])
                artificial_class_rgb_color = (255,255,0)
                artificial_class_label = config_params['artificial_class_label']
            #values = config_params.values()

            if 'textregions' in keys:
                types_text_dict = config_params['textregions']
                types_text = list(types_text_dict.keys())
                types_text_label = list(types_text_dict.values())
            if 'graphicregions' in keys:
                types_graphic_dict = config_params['graphicregions']
                types_graphic = list(types_graphic_dict.keys())
                types_graphic_label = list(types_graphic_dict.values())

                
            labels_rgb_color = [ (0,0,0), (255,0,0), (255,125,0), (255,0,125), (125,255,125), (125,125,0), (0,125,255), (0,125,0), (125,125,125), (255,0,255), (125,0,125), (0,255,0),(0,0,255), (0,255,255), (255,125,125),  (0,125,125), (0,255,125), (255,125,255), (125,255,0)]
            
            
            region_tags=np.unique([x for x in alltags if x.endswith('Region')])   
            co_text = {'drop-capital':[], "footnote":[], "footnote-continued":[], "heading":[], "signature-mark":[], "header":[], "catch-word":[], "page-number":[], "marginalia":[], "paragraph":[]}
            co_graphic = {"handwritten-annotation":[], "decoration":[], "stamp":[], "signature":[]}
            co_sep=[]
            co_img=[]
            co_table=[]
            co_noise=[]
            
            for tag in region_tags:
                if 'textregions' in keys:
                    if tag.endswith('}TextRegion') or tag.endswith('}Textregion'):
                        for nn in root1.iter(tag):
                            c_t_in = {'drop-capital':[], "footnote":[], "footnote-continued":[], "heading":[], "signature-mark":[], "header":[], "catch-word":[], "page-number":[], "marginalia":[], "paragraph":[]}
                            sumi=0
                            for vv in nn.iter():
                                # check the format of coords
                                if vv.tag==link+'Coords':
                
                                    coords=bool(vv.attrib)
                                    if coords:
                                        p_h=vv.attrib['points'].split(' ')
                                        
                                        if "rest_as_paragraph" in types_text:
                                            types_text_without_paragraph = [element for element in types_text if element!='rest_as_paragraph' and element!='paragraph']
                                            if len(types_text_without_paragraph) == 0:
                                                if "type" in nn.attrib:
                                                    c_t_in['paragraph'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                            elif len(types_text_without_paragraph) >= 1:
                                                if "type" in nn.attrib:
                                                    if nn.attrib['type'] in types_text_without_paragraph:
                                                        c_t_in[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                                    else:
                                                        c_t_in['paragraph'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                                        
                                        else:
                                            if "type" in nn.attrib:
                                                c_t_in[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                
                                        break
                                    else:
                                        pass
                
                                
                                if vv.tag==link+'Point':
                                    if "rest_as_paragraph" in types_text:
                                        types_text_without_paragraph = [element for element in types_text if element!='rest_as_paragraph' and element!='paragraph']
                                        if len(types_text_without_paragraph) == 0:
                                            if "type" in nn.attrib:
                                                c_t_in['paragraph'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                sumi+=1
                                        elif len(types_text_without_paragraph) >= 1:
                                            if "type" in nn.attrib:
                                                if nn.attrib['type'] in types_text_without_paragraph:
                                                    c_t_in[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                    sumi+=1
                                                else:
                                                    c_t_in['paragraph'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                    sumi+=1
                                                    
                                    else:
                                        if "type" in nn.attrib:
                                            c_t_in[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                            sumi+=1


                                elif vv.tag!=link+'Point' and sumi>=1:
                                    break
                
                            for element_text in list(c_t_in.keys()):
                                if len(c_t_in[element_text])>0:
                                    co_text[element_text].append(np.array(c_t_in[element_text]))
                                    
                if 'graphicregions' in keys:
                    if tag.endswith('}GraphicRegion') or tag.endswith('}graphicregion'):
                        #print('sth')
                        for nn in root1.iter(tag):
                            c_t_in_graphic = {"handwritten-annotation":[], "decoration":[], "stamp":[], "signature":[]}
                            sumi=0
                            for vv in nn.iter():
                                # check the format of coords
                                if vv.tag==link+'Coords':
                                    coords=bool(vv.attrib)
                                    if coords:
                                        p_h=vv.attrib['points'].split(' ')
                                        
                                        if "rest_as_decoration" in types_graphic:
                                            types_graphic_without_decoration = [element for element in types_graphic if element!='rest_as_decoration' and element!='decoration']
                                            if len(types_graphic_without_decoration) == 0:
                                                if "type" in nn.attrib:
                                                    c_t_in_graphic['decoration'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                            elif len(types_graphic_without_decoration) >= 1:
                                                if "type" in nn.attrib:
                                                    if nn.attrib['type'] in types_graphic_without_decoration:
                                                        c_t_in_graphic[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                                    else:
                                                        c_t_in_graphic['decoration'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                                        
                                        else:
                                            if "type" in nn.attrib:
                                                c_t_in_graphic[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )        
                                        
                                        break
                                    else:
                                        pass
                
                
                                if vv.tag==link+'Point':
                                    if "rest_as_decoration" in types_graphic:
                                        types_graphic_without_decoration = [element for element in types_graphic if element!='rest_as_decoration' and element!='decoration']
                                        if len(types_graphic_without_decoration) == 0:
                                            if "type" in nn.attrib:
                                                c_t_in_graphic['decoration'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                sumi+=1
                                        elif len(types_graphic_without_decoration) >= 1:
                                            if "type" in nn.attrib:
                                                if nn.attrib['type'] in types_graphic_without_decoration:
                                                    c_t_in_graphic[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                    sumi+=1
                                                else:
                                                    c_t_in_graphic['decoration'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
                                                    sumi+=1
                                                    
                                    else:
                                        if "type" in nn.attrib:
                                            c_t_in_graphic[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) 
                                            sumi+=1
                                            
                                elif vv.tag!=link+'Point' and sumi>=1:
                                    break
                                
                            for element_graphic in list(c_t_in_graphic.keys()):
                                if len(c_t_in_graphic[element_graphic])>0:
                                    co_graphic[element_graphic].append(np.array(c_t_in_graphic[element_graphic]))
                                    
            
                if 'imageregion' in keys:
                    if tag.endswith('}ImageRegion') or tag.endswith('}imageregion'):
                        for nn in root1.iter(tag):
                            c_t_in=[]
                            sumi=0
                            for vv in nn.iter():
                                if vv.tag==link+'Coords':
                                    coords=bool(vv.attrib)
                                    if coords:
                                        p_h=vv.attrib['points'].split(' ')
                                        c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                        break
                                    else:
                                        pass
                
                
                                if vv.tag==link+'Point':
                                    c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                                    sumi+=1

                                elif vv.tag!=link+'Point' and sumi>=1:
                                    break
                            co_img.append(np.array(c_t_in))
            
                
                if 'separatorregion' in keys:
                    if tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'):
                        for nn in root1.iter(tag):
                            c_t_in=[]
                            sumi=0
                            for vv in nn.iter():
                                # check the format of coords
                                if vv.tag==link+'Coords':
                                    coords=bool(vv.attrib)
                                    if coords:
                                        p_h=vv.attrib['points'].split(' ')
                                        c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                        break
                                    else:
                                        pass
                
                
                                if vv.tag==link+'Point':
                                    c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                                    sumi+=1

                                elif vv.tag!=link+'Point' and sumi>=1:
                                    break
                            co_sep.append(np.array(c_t_in))
            
            
                if 'tableregion' in keys:
                    if tag.endswith('}TableRegion') or tag.endswith('}tableregion'):
                        #print('sth')
                        for nn in root1.iter(tag):
                            c_t_in=[]
                            sumi=0
                            for vv in nn.iter():
                                # check the format of coords
                                if vv.tag==link+'Coords':
                                    coords=bool(vv.attrib)
                                    if coords:
                                        p_h=vv.attrib['points'].split(' ')
                                        c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                        break
                                    else:
                                        pass
                
                
                                if vv.tag==link+'Point':
                                    c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                                    sumi+=1
                                #print(vv.tag,'in')
                                elif vv.tag!=link+'Point' and sumi>=1:
                                    break
                            co_table.append(np.array(c_t_in))
            
                if 'noiseregion' in keys:
                    if tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'):
                        #print('sth')
                        for nn in root1.iter(tag):
                            c_t_in=[]
                            sumi=0
                            for vv in nn.iter():
                                # check the format of coords
                                if vv.tag==link+'Coords':
                                    coords=bool(vv.attrib)
                                    if coords:
                                        p_h=vv.attrib['points'].split(' ')
                                        c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                        break
                                    else:
                                        pass
                
                
                                if vv.tag==link+'Point':
                                    c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                                    sumi+=1
                                #print(vv.tag,'in')
                                elif vv.tag!=link+'Point' and sumi>=1:
                                    break
                            co_noise.append(np.array(c_t_in))
            
            if "artificial_class_on_boundry" in keys:
                img_boundary = np.zeros( (y_len,x_len) )
                if "paragraph" in elements_with_artificial_class:
                    erosion_rate = 2
                    dilation_rate = 4
                    co_text['paragraph'], img_boundary = update_region_contours(co_text['paragraph'], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "drop-capital" in elements_with_artificial_class:
                    erosion_rate = 0
                    dilation_rate = 4
                    co_text["drop-capital"], img_boundary = update_region_contours(co_text["drop-capital"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "catch-word" in elements_with_artificial_class:
                    erosion_rate = 0
                    dilation_rate = 4
                    co_text["catch-word"], img_boundary = update_region_contours(co_text["catch-word"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "page-number" in elements_with_artificial_class:
                    erosion_rate = 0
                    dilation_rate = 4
                    co_text["page-number"], img_boundary = update_region_contours(co_text["page-number"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "header" in elements_with_artificial_class:
                    erosion_rate = 1
                    dilation_rate = 4
                    co_text["header"], img_boundary = update_region_contours(co_text["header"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "heading" in elements_with_artificial_class:
                    erosion_rate = 1
                    dilation_rate = 4
                    co_text["heading"], img_boundary = update_region_contours(co_text["heading"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "signature-mark" in elements_with_artificial_class:
                    erosion_rate = 1
                    dilation_rate = 4
                    co_text["signature-mark"], img_boundary = update_region_contours(co_text["signature-mark"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "marginalia" in elements_with_artificial_class:
                    erosion_rate = 2
                    dilation_rate = 4
                    co_text["marginalia"], img_boundary = update_region_contours(co_text["marginalia"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "footnote" in elements_with_artificial_class:
                    erosion_rate = 2
                    dilation_rate = 4
                    co_text["footnote"], img_boundary = update_region_contours(co_text["footnote"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                if "footnote-continued" in elements_with_artificial_class:
                    erosion_rate = 2
                    dilation_rate = 4
                    co_text["footnote-continued"], img_boundary = update_region_contours(co_text["footnote-continued"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
                    
                    
            img = np.zeros( (y_len,x_len,3) ) 

            if output_type == '3d':
                if 'graphicregions' in keys:
                    if 'rest_as_decoration' in types_graphic:
                        types_graphic[types_graphic=='rest_as_decoration'] = 'decoration'
                        for element_graphic in types_graphic:
                            if element_graphic == 'decoration':
                                color_label = labels_rgb_color[ config_params['graphicregions']['rest_as_decoration']]
                            else:
                                color_label = labels_rgb_color[ config_params['graphicregions'][element_graphic]]
                            img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label)
                    else:
                        for element_graphic in types_graphic:
                            color_label = labels_rgb_color[ config_params['graphicregions'][element_graphic]]
                            img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label)
                    
                        
                if 'imageregion' in keys: 
                    img_poly=cv2.fillPoly(img, pts =co_img, color=labels_rgb_color[ config_params['imageregion']])
                if 'separatorregion' in keys: 
                    img_poly=cv2.fillPoly(img, pts =co_sep, color=labels_rgb_color[ config_params['separatorregion']])
                if 'tableregion' in keys:  
                    img_poly=cv2.fillPoly(img, pts =co_table, color=labels_rgb_color[ config_params['tableregion']])
                if 'noiseregion' in keys:  
                    img_poly=cv2.fillPoly(img, pts =co_noise, color=labels_rgb_color[ config_params['noiseregion']])
                    
                if 'textregions' in keys:
                    if 'rest_as_paragraph' in types_text:
                        types_text[types_text=='rest_as_paragraph'] = 'paragraph'
                        for element_text in types_text:
                            if element_text == 'paragraph':
                                color_label = labels_rgb_color[ config_params['textregions']['rest_as_paragraph']]
                            else:
                                color_label = labels_rgb_color[ config_params['textregions'][element_text]]
                            img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label)
                    else:
                        for element_text in types_text:
                            color_label = labels_rgb_color[ config_params['textregions'][element_text]]
                            img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label)
                        
                        
                if "artificial_class_on_boundry" in keys:
                    img_poly[:,:,0][img_boundary[:,:]==1] = artificial_class_rgb_color[0]
                    img_poly[:,:,1][img_boundary[:,:]==1] = artificial_class_rgb_color[1]
                    img_poly[:,:,2][img_boundary[:,:]==1] = artificial_class_rgb_color[2]
                    

            elif output_type == '2d':
                if 'graphicregions' in keys:
                    if 'rest_as_decoration' in types_graphic:
                        types_graphic[types_graphic=='rest_as_decoration'] = 'decoration'
                        for element_graphic in types_graphic:
                            if element_graphic == 'decoration':
                                color_label = config_params['graphicregions']['rest_as_decoration']
                            else:
                                color_label = config_params['graphicregions'][element_graphic]
                            img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label)
                    else:
                        for element_graphic in types_graphic:
                            color_label = config_params['graphicregions'][element_graphic]
                            img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label)
                            
                
                if 'imageregion' in keys:
                    color_label = config_params['imageregion']
                    img_poly=cv2.fillPoly(img, pts =co_img, color=(color_label,color_label,color_label))
                if 'separatorregion' in keys: 
                    color_label = config_params['separatorregion']
                    img_poly=cv2.fillPoly(img, pts =co_sep, color=(color_label,color_label,color_label))
                if 'tableregion' in keys:
                    color_label = config_params['tableregion']
                    img_poly=cv2.fillPoly(img, pts =co_table, color=(color_label,color_label,color_label))
                if 'noiseregion' in keys:
                    color_label = config_params['noiseregion']
                    img_poly=cv2.fillPoly(img, pts =co_noise, color=(color_label,color_label,color_label))
                    
                if 'textregions' in keys:
                    if 'rest_as_paragraph' in types_text:
                        types_text[types_text=='rest_as_paragraph'] = 'paragraph'
                        for element_text in types_text:
                            if element_text == 'paragraph':
                                color_label = config_params['textregions']['rest_as_paragraph']
                            else:
                                color_label = config_params['textregions'][element_text]
                            img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label)
                    else:
                        for element_text in types_text:
                            color_label = config_params['textregions'][element_text]
                            img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label)
                        
                if "artificial_class_on_boundry" in keys:
                    img_poly[:,:][img_boundary[:,:]==1] = artificial_class_label
                
                
            try: 
                cv2.imwrite(output_dir+'/'+gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly )
            except:
                cv2.imwrite(output_dir+'/'+gt_list[index].split('.')[0]+'.png',img_poly )
                
                
def find_new_features_of_contours(contours_main):
    
    #print(contours_main[0][0][:, 0])

    areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
    M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
    cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
    cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
    try:
        x_min_main = np.array([np.min(contours_main[j][0][:, 0]) for j in range(len(contours_main))])

        argmin_x_main = np.array([np.argmin(contours_main[j][0][:, 0]) for j in range(len(contours_main))])

        x_min_from_argmin = np.array([contours_main[j][0][argmin_x_main[j], 0] for j in range(len(contours_main))])
        y_corr_x_min_from_argmin = np.array([contours_main[j][0][argmin_x_main[j], 1] for j in range(len(contours_main))])

        x_max_main = np.array([np.max(contours_main[j][0][:, 0]) for j in range(len(contours_main))])

        y_min_main = np.array([np.min(contours_main[j][0][:, 1]) for j in range(len(contours_main))])
        y_max_main = np.array([np.max(contours_main[j][0][:, 1]) for j in range(len(contours_main))])
    except:
        x_min_main = np.array([np.min(contours_main[j][:, 0]) for j in range(len(contours_main))])

        argmin_x_main = np.array([np.argmin(contours_main[j][:, 0]) for j in range(len(contours_main))])

        x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0] for j in range(len(contours_main))])
        y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1] for j in range(len(contours_main))])

        x_max_main = np.array([np.max(contours_main[j][:, 0]) for j in range(len(contours_main))])

        y_min_main = np.array([np.min(contours_main[j][:, 1]) for j in range(len(contours_main))])
        y_max_main = np.array([np.max(contours_main[j][:, 1]) for j in range(len(contours_main))])

    # dis_x=np.abs(x_max_main-x_min_main)

    return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin
def read_xml(xml_file):
    file_name = Path(xml_file).stem
    tree1 = ET.parse(xml_file)
    root1=tree1.getroot()
    alltags=[elem.tag for elem in root1.iter()]
    link=alltags[0].split('}')[0]+'}'

    index_tot_regions = []
    tot_region_ref = []

    for jj in root1.iter(link+'Page'):
        y_len=int(jj.attrib['imageHeight'])
        x_len=int(jj.attrib['imageWidth'])


    for jj in root1.iter(link+'RegionRefIndexed'):
        index_tot_regions.append(jj.attrib['index'])
        tot_region_ref.append(jj.attrib['regionRef'])

    region_tags=np.unique([x for x in alltags if x.endswith('Region')])   
    #print(region_tags)
    co_text_paragraph=[]
    co_text_drop=[]
    co_text_heading=[]
    co_text_header=[]
    co_text_marginalia=[]
    co_text_catch=[]
    co_text_page_number=[]
    co_text_signature_mark=[]
    co_sep=[]
    co_img=[]
    co_table=[]
    co_graphic=[]
    co_graphic_text_annotation=[]
    co_graphic_decoration=[]
    co_noise=[]


    co_text_paragraph_text=[]
    co_text_drop_text=[]
    co_text_heading_text=[]
    co_text_header_text=[]
    co_text_marginalia_text=[]
    co_text_catch_text=[]
    co_text_page_number_text=[]
    co_text_signature_mark_text=[]
    co_sep_text=[]
    co_img_text=[]
    co_table_text=[]
    co_graphic_text=[]
    co_graphic_text_annotation_text=[]
    co_graphic_decoration_text=[]
    co_noise_text=[]


    id_paragraph = []
    id_header = []
    id_heading = []
    id_marginalia = []

    for tag in region_tags:
        if tag.endswith('}TextRegion') or tag.endswith('}Textregion'):
            for nn in root1.iter(tag):
                for child2 in nn:
                    tag2 = child2.tag
                    #print(child2.tag)
                    if tag2.endswith('}TextEquiv') or tag2.endswith('}TextEquiv'):
                        #children2 = childtext.getchildren()
                        #rank = child2.find('Unicode').text
                        for childtext2 in child2:
                            #rank = childtext2.find('Unicode').text
                            #if childtext2.tag.endswith('}PlainText') or childtext2.tag.endswith('}PlainText'):
                            #print(childtext2.text)
                            if childtext2.tag.endswith('}Unicode') or childtext2.tag.endswith('}Unicode'):
                                if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
                                    co_text_drop_text.append(childtext2.text)
                                elif "type" in nn.attrib and nn.attrib['type']=='heading':
                                    co_text_heading_text.append(childtext2.text)
                                elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':
                                    co_text_signature_mark_text.append(childtext2.text)
                                elif "type" in nn.attrib and nn.attrib['type']=='header':
                                    co_text_header_text.append(childtext2.text)
                                elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
                                    co_text_catch_text.append(childtext2.text)
                                elif "type" in nn.attrib and nn.attrib['type']=='page-number':
                                    co_text_page_number_text.append(childtext2.text)
                                elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
                                    co_text_marginalia_text.append(childtext2.text)
                                else:
                                    co_text_paragraph_text.append(childtext2.text)
                c_t_in_drop=[]
                c_t_in_paragraph=[]
                c_t_in_heading=[]
                c_t_in_header=[]
                c_t_in_page_number=[]
                c_t_in_signature_mark=[]
                c_t_in_catch=[]
                c_t_in_marginalia=[]


                sumi=0
                for vv in nn.iter():
                    # check the format of coords
                    if vv.tag==link+'Coords':

                        coords=bool(vv.attrib)
                        if coords:
                            #print('birda1')
                            p_h=vv.attrib['points'].split(' ')


                            if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
                            #if nn.attrib['type']=='paragraph':

                                c_t_in_drop.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )

                            elif "type" in nn.attrib and nn.attrib['type']=='heading':
                                id_heading.append(nn.attrib['id'])
                                c_t_in_heading.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )


                            elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':

                                c_t_in_signature_mark.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                #print(c_t_in_paragraph)
                            elif "type" in nn.attrib and nn.attrib['type']=='header':
                                id_header.append(nn.attrib['id'])
                                c_t_in_header.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )


                            elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
                                c_t_in_catch.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )


                            elif "type" in nn.attrib and nn.attrib['type']=='page-number':

                                c_t_in_page_number.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                #print(c_t_in_paragraph)

                            elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
                                id_marginalia.append(nn.attrib['id'])

                                c_t_in_marginalia.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                #print(c_t_in_paragraph)
                            else:
                                #print(nn.attrib['id'])

                                id_paragraph.append(nn.attrib['id'])

                                c_t_in_paragraph.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                #print(c_t_in_paragraph)

                            break
                        else:
                            pass


                    if vv.tag==link+'Point':
                        if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
                        #if nn.attrib['type']=='paragraph':

                            c_t_in_drop.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1

                        elif "type" in nn.attrib and nn.attrib['type']=='heading':
                            id_heading.append(nn.attrib['id'])
                            c_t_in_heading.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1


                        elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':

                            c_t_in_signature_mark.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            #print(c_t_in_paragraph)
                            sumi+=1
                        elif "type" in nn.attrib and nn.attrib['type']=='header':
                            id_header.append(nn.attrib['id'])
                            c_t_in_header.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1


                        elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
                            c_t_in_catch.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1


                        elif "type" in nn.attrib and nn.attrib['type']=='page-number':

                            c_t_in_page_number.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            #print(c_t_in_paragraph)
                            sumi+=1

                        elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
                            id_marginalia.append(nn.attrib['id'])

                            c_t_in_marginalia.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            #print(c_t_in_paragraph)
                            sumi+=1

                        else:
                            id_paragraph.append(nn.attrib['id'])
                            c_t_in_paragraph.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            #print(c_t_in_paragraph)
                            sumi+=1

                        #c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])

                    #print(vv.tag,'in')
                    elif vv.tag!=link+'Point' and sumi>=1:
                        break

                if len(c_t_in_drop)>0:
                    co_text_drop.append(np.array(c_t_in_drop))
                if len(c_t_in_paragraph)>0:
                    co_text_paragraph.append(np.array(c_t_in_paragraph))
                if len(c_t_in_heading)>0:
                    co_text_heading.append(np.array(c_t_in_heading))

                if len(c_t_in_header)>0:
                    co_text_header.append(np.array(c_t_in_header))
                if len(c_t_in_page_number)>0:
                    co_text_page_number.append(np.array(c_t_in_page_number))
                if len(c_t_in_catch)>0:
                    co_text_catch.append(np.array(c_t_in_catch))

                if len(c_t_in_signature_mark)>0:
                    co_text_signature_mark.append(np.array(c_t_in_signature_mark))

                if len(c_t_in_marginalia)>0:
                    co_text_marginalia.append(np.array(c_t_in_marginalia))


        elif tag.endswith('}GraphicRegion') or tag.endswith('}graphicregion'):
            #print('sth')
            for nn in root1.iter(tag):
                c_t_in=[]
                c_t_in_text_annotation=[]
                c_t_in_decoration=[]
                sumi=0
                for vv in nn.iter():
                    # check the format of coords
                    if vv.tag==link+'Coords':
                        coords=bool(vv.attrib)
                        if coords:
                            p_h=vv.attrib['points'].split(' ')
                            #c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )

                            if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
                            #if nn.attrib['type']=='paragraph':

                                c_t_in_text_annotation.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )

                            elif "type" in nn.attrib and nn.attrib['type']=='decoration':

                                c_t_in_decoration.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                                #print(c_t_in_paragraph)
                            else:
                                c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )


                            break
                        else:
                            pass


                    if vv.tag==link+'Point':

                        if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
                        #if nn.attrib['type']=='paragraph':

                            c_t_in_text_annotation.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1

                        elif "type" in nn.attrib and nn.attrib['type']=='decoration':

                            c_t_in_decoration.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            #print(c_t_in_paragraph)
                            sumi+=1
                        else:
                            c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                            sumi+=1

                if len(c_t_in_text_annotation)>0:
                    co_graphic_text_annotation.append(np.array(c_t_in_text_annotation))
                if len(c_t_in_decoration)>0:
                    co_graphic_decoration.append(np.array(c_t_in_decoration))
                if len(c_t_in)>0:
                    co_graphic.append(np.array(c_t_in))


        elif tag.endswith('}ImageRegion') or tag.endswith('}imageregion'):
            #print('sth')
            for nn in root1.iter(tag):
                c_t_in=[]
                sumi=0
                for vv in nn.iter():
                    # check the format of coords
                    if vv.tag==link+'Coords':
                        coords=bool(vv.attrib)
                        if coords:
                            p_h=vv.attrib['points'].split(' ')
                            c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                            break
                        else:
                            pass


                    if vv.tag==link+'Point':
                        c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                        sumi+=1
                    #print(vv.tag,'in')
                    elif vv.tag!=link+'Point' and sumi>=1:
                        break
                co_img.append(np.array(c_t_in))
                co_img_text.append(' ')


        elif tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'):
            #print('sth')
            for nn in root1.iter(tag):
                c_t_in=[]
                sumi=0
                for vv in nn.iter():
                    # check the format of coords
                    if vv.tag==link+'Coords':
                        coords=bool(vv.attrib)
                        if coords:
                            p_h=vv.attrib['points'].split(' ')
                            c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                            break
                        else:
                            pass


                    if vv.tag==link+'Point':
                        c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                        sumi+=1
                    #print(vv.tag,'in')
                    elif vv.tag!=link+'Point' and sumi>=1:
                        break
                co_sep.append(np.array(c_t_in))


        elif tag.endswith('}TableRegion') or tag.endswith('}tableregion'):
            #print('sth')
            for nn in root1.iter(tag):
                c_t_in=[]
                sumi=0
                for vv in nn.iter():
                    # check the format of coords
                    if vv.tag==link+'Coords':
                        coords=bool(vv.attrib)
                        if coords:
                            p_h=vv.attrib['points'].split(' ')
                            c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                            break
                        else:
                            pass


                    if vv.tag==link+'Point':
                        c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                        sumi+=1
                    #print(vv.tag,'in')
                    elif vv.tag!=link+'Point' and sumi>=1:
                        break
                co_table.append(np.array(c_t_in))
                co_table_text.append(' ')

        elif tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'):
            #print('sth')
            for nn in root1.iter(tag):
                c_t_in=[]
                sumi=0
                for vv in nn.iter():
                    # check the format of coords
                    if vv.tag==link+'Coords':
                        coords=bool(vv.attrib)
                        if coords:
                            p_h=vv.attrib['points'].split(' ')
                            c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
                            break
                        else:
                            pass


                    if vv.tag==link+'Point':
                        c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
                        sumi+=1
                    #print(vv.tag,'in')
                    elif vv.tag!=link+'Point' and sumi>=1:
                        break
                co_noise.append(np.array(c_t_in))
                co_noise_text.append(' ')


    img = np.zeros( (y_len,x_len,3) ) 

    img_poly=cv2.fillPoly(img, pts =co_text_paragraph, color=(1,1,1))

    img_poly=cv2.fillPoly(img, pts =co_text_heading, color=(2,2,2))
    img_poly=cv2.fillPoly(img, pts =co_text_header, color=(2,2,2))
    #img_poly=cv2.fillPoly(img, pts =co_text_catch, color=(125,255,125))
    #img_poly=cv2.fillPoly(img, pts =co_text_signature_mark, color=(125,125,0))
    #img_poly=cv2.fillPoly(img, pts =co_graphic_decoration, color=(1,125,255))
    #img_poly=cv2.fillPoly(img, pts =co_text_page_number, color=(1,125,0))
    img_poly=cv2.fillPoly(img, pts =co_text_marginalia, color=(3,3,3))
    #img_poly=cv2.fillPoly(img, pts =co_text_drop, color=(1,125,255))

    #img_poly=cv2.fillPoly(img, pts =co_graphic_text_annotation, color=(125,0,125))
    img_poly=cv2.fillPoly(img, pts =co_img, color=(4,4,4))
    img_poly=cv2.fillPoly(img, pts =co_sep, color=(5,5,5))
    #img_poly=cv2.fillPoly(img, pts =co_table, color=(1,255,255))
    #img_poly=cv2.fillPoly(img, pts =co_graphic, color=(255,125,125))
    #img_poly=cv2.fillPoly(img, pts =co_noise, color=(255,0,255))

    #print('yazdimmm',self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.jpg')
    ###try: 
        ####print('yazdimmm',self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.jpg')
        ###cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.jpg',img_poly )
    ###except:
        ###cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.jpg',img_poly )
    return file_name, id_paragraph, id_header,co_text_paragraph, co_text_header,\
tot_region_ref,x_len, y_len,index_tot_regions, img_poly


def bounding_box(cnt,color, corr_order_index ):
    x, y, w, h = cv2.boundingRect(cnt)
    x = int(x*scale_w)
    y = int(y*scale_h)
    
    w = int(w*scale_w)
    h = int(h*scale_h)
    
    return [x,y,w,h,int(color), int(corr_order_index)+1]

def resize_image(seg_in,input_height,input_width):
    return cv2.resize(seg_in,(input_width,input_height),interpolation=cv2.INTER_NEAREST)

def make_image_from_bb(width_l, height_l, bb_all):
    bb_all =np.array(bb_all)
    img_remade = np.zeros((height_l,width_l ))
    
    for i in range(bb_all.shape[0]):
        img_remade[bb_all[i,1]:bb_all[i,1]+bb_all[i,3],bb_all[i,0]:bb_all[i,0]+bb_all[i,2] ] = 1
    return img_remade