page2label with a dynamic layout

2026-01-09 11:57:11 +01:00 · 2024-05-22 12:38:24 +02:00 · 2024-05-22 12:38:24 +02:00 · f7dda078d2
commit f7dda078d2
parent 5f06a02441
2 changed files with 479 additions and 17 deletions
--- a/custom_config_page2label.json
+++ b/custom_config_page2label.json
@ -0,0 +1,6 @@
+{
+"textregions":{"paragraph":1, "heading": 2, "header":2,"drop-capital": 3, "marginal":4 },
+"imageregion":5,
+"separatorregion":6,
+"graphicregions" :{"handwritten-annotation":7, "decoration": 8, "signature": 9, "stamp": 10}
+}
--- a/pagexml2label.py
+++ b/pagexml2label.py
@ -7,6 +7,7 @@ import xml.etree.ElementTree as ET
 from tqdm import tqdm
 import cv2
 from shapely import geometry
+import json

 with warnings.catch_warnings():
    warnings.simplefilter("ignore")
@ -21,11 +22,12 @@ This classes.txt file is required for dhsegment tool.
 KERNEL = np.ones((5, 5), np.uint8)

 class pagexml2word:
-    def __init__(self,dir_in, out_dir,output_type,experiment):
+    def __init__(self,dir_in, out_dir,output_type,experiment,layout_config):
        self.dir=dir_in
        self.output_dir=out_dir
        self.output_type=output_type
        self.experiment=experiment
+        self.layout_config=layout_config

    def get_content_of_dir(self):
        """
@ -77,7 +79,7 @@ class pagexml2word:

        return contours_imgs

-    def get_images_of_ground_truth(self):
+    def get_images_of_ground_truth(self, config_params):
        """
        Reading the page xml files and write the ground truth images into given output directory.
        """
@ -93,6 +95,445 @@ class pagexml2word:
            for jj in root1.iter(link+'Page'):
                y_len=int(jj.attrib['imageHeight'])
                x_len=int(jj.attrib['imageWidth'])
+                
+            if self.layout_config:
+                keys = list(config_params.keys())
+                #values = config_params.values()
+
+                if 'textregions' in keys:
+                    types_text_dict = config_params['textregions']
+                    types_text = list(types_text_dict.keys())
+                    types_text_label = list(types_text_dict.values())
+                if 'graphicregions' in keys:
+                    types_graphic_dict = config_params['graphicregions']
+                    types_graphic = list(types_graphic_dict.keys())
+                    types_graphic_label = list(types_graphic_dict.values())
+
+                    
+                types_text_label_rgb = [ (0,0,0), (255,0,0), (255,125,0), (255,0,125), (125,255,125), (125,125,0), (0,125,255), (0,125,0), (125,125,125), (0,125,255), (125,0,125), (0,255,0),(0,0,255), (0,255,255), (255,125,125),  (0,125,255), (0,255,125)]
+                
+                region_tags=np.unique([x for x in alltags if x.endswith('Region')])   
+
+                co_text_paragraph=[]
+                co_text_drop=[]
+                co_text_heading=[]
+                co_text_header=[]
+                co_text_marginalia=[]
+                co_text_catch=[]
+                co_text_page_number=[]
+                co_text_signature_mark=[]
+                co_sep=[]
+                co_img=[]
+                co_table=[]
+                co_graphic_signature=[]
+                co_graphic_text_annotation=[]
+                co_graphic_decoration=[]
+                co_graphic_stamp=[]
+                co_noise=[]
+                
+                for tag in region_tags:
+                    if 'textregions' in keys:
+                        if tag.endswith('}TextRegion') or tag.endswith('}Textregion'):
+                            for nn in root1.iter(tag):
+                                c_t_in_drop=[]
+                                c_t_in_paragraph=[]
+                                c_t_in_heading=[]
+                                c_t_in_header=[]
+                                c_t_in_page_number=[]
+                                c_t_in_signature_mark=[]
+                                c_t_in_catch=[]
+                                c_t_in_marginalia=[]
+                                sumi=0
+                                for vv in nn.iter():
+                                    # check the format of coords
+                                    if vv.tag==link+'Coords':
+                    
+                                        coords=bool(vv.attrib)
+                                        if coords:
+                                            #print('birda1')
+                                            p_h=vv.attrib['points'].split(' ')
+                                            
+                                            if "drop-capital" in types_text:
+                                                if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
+                                                    c_t_in_drop.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                            
+                                            if "heading" in types_text:
+                                                if "type" in nn.attrib and nn.attrib['type']=='heading':
+                                                    c_t_in_heading.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                        
+                                            if "signature-mark" in types_text:
+                                                if "type" in nn.attrib and nn.attrib['type']=='signature-mark':
+                                                    c_t_in_signature_mark.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+
+                                            if "header" in types_text:
+                                                if "type" in nn.attrib and nn.attrib['type']=='header':
+                                                    c_t_in_header.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                            
+                                            if "catch-word" in types_text:
+                                                if "type" in nn.attrib and nn.attrib['type']=='catch-word':
+                                                    c_t_in_catch.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                        
+                                            if "page-number" in types_text:
+                                                if "type" in nn.attrib and nn.attrib['type']=='page-number':
+                                                    c_t_in_page_number.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+
+                                            if "marginalia" in types_text:    
+                                                if "type" in nn.attrib and nn.attrib['type']=='marginalia':
+                                                    c_t_in_marginalia.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                                
+                                            if "paragraph" in types_text:
+                                                if "type" in nn.attrib and nn.attrib['type']=='paragraph':
+                                                    c_t_in_paragraph.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+
+                    
+                                            break
+                                        else:
+                                            pass
+                    
+                    
+                                    if vv.tag==link+'Point':
+                                        if "drop-capital" in types_text:
+                                            if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
+                                                c_t_in_drop.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                                sumi+=1
+                                                
+                                        if "heading" in types_text:
+                                            if "type" in nn.attrib and nn.attrib['type']=='heading':
+                                                c_t_in_heading.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                                sumi+=1
+                                                
+                                        if "signature-mark" in types_text:
+                                            if "type" in nn.attrib and nn.attrib['type']=='signature-mark':
+                                                c_t_in_signature_mark.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                                sumi+=1
+                                         
+                                        if "header" in types_text:
+                                            if "type" in nn.attrib and nn.attrib['type']=='header':
+                                                c_t_in_header.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                                sumi+=1
+                                        
+                                        if "catch-word" in types_text:
+                                            if "type" in nn.attrib and nn.attrib['type']=='catch-word':
+                                                c_t_in_catch.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                                sumi+=1
+                                                
+                                        if "page-number" in types_text:
+                                            if "type" in nn.attrib and nn.attrib['type']=='page-number':
+                                                c_t_in_page_number.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                                sumi+=1
+                                        
+                                        if "marginalia" in types_text:
+                                            if "type" in nn.attrib and nn.attrib['type']=='marginalia':
+                                                c_t_in_marginalia.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                                sumi+=1
+                                            
+                                        if "paragraph" in types_text:
+                                            if "type" in nn.attrib and nn.attrib['type']=='paragraph':
+                                                c_t_in_paragraph.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                                sumi+=1
+                                            
+
+                                    elif vv.tag!=link+'Point' and sumi>=1:
+                                        break
+                    
+                                if len(c_t_in_drop)>0:
+                                    co_text_drop.append(np.array(c_t_in_drop))
+                                if len(c_t_in_paragraph)>0:
+                                    co_text_paragraph.append(np.array(c_t_in_paragraph))
+                                if len(c_t_in_heading)>0:
+                                    co_text_heading.append(np.array(c_t_in_heading))
+                                    
+                                if len(c_t_in_header)>0:
+                                    co_text_header.append(np.array(c_t_in_header))
+                                if len(c_t_in_page_number)>0:
+                                    co_text_page_number.append(np.array(c_t_in_page_number))
+                                if len(c_t_in_catch)>0:
+                                    co_text_catch.append(np.array(c_t_in_catch))
+                                    
+                                if len(c_t_in_signature_mark)>0:
+                                    co_text_signature_mark.append(np.array(c_t_in_signature_mark))
+                                    
+                                if len(c_t_in_marginalia)>0:
+                                    co_text_marginalia.append(np.array(c_t_in_marginalia))
+                                    
+                    
+                    if 'graphicregions' in keys:
+                        if tag.endswith('}GraphicRegion') or tag.endswith('}graphicregion'):
+                            #print('sth')
+                            for nn in root1.iter(tag):
+                                c_t_in_stamp=[]
+                                c_t_in_text_annotation=[]
+                                c_t_in_decoration=[]
+                                c_t_in_signature=[]
+                                sumi=0
+                                for vv in nn.iter():
+                                    # check the format of coords
+                                    if vv.tag==link+'Coords':
+                                        coords=bool(vv.attrib)
+                                        if coords:
+                                            p_h=vv.attrib['points'].split(' ')
+                                            if "handwritten-annotation" in types_graphic:
+                                                if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
+                                                    c_t_in_text_annotation.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                            
+                                            if "decoration" in types_graphic:
+                                                if "type" in nn.attrib and nn.attrib['type']=='decoration':
+                                                    c_t_in_decoration.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+
+                                            if "stamp" in types_graphic:
+                                                if "type" in nn.attrib and nn.attrib['type']=='stamp':
+                                                    c_t_in_stamp.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                                
+                                            if "signature" in types_graphic:
+                                                if "type" in nn.attrib and nn.attrib['type']=='signature':
+                                                    c_t_in_signature.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                        
+                                            
+                                            
+                                            break
+                                        else:
+                                            pass
+                    
+                    
+                                    if vv.tag==link+'Point':
+                                        if "handwritten-annotation" in types_graphic:
+                                            if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
+                                                c_t_in_text_annotation.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                                sumi+=1
+                                                
+                                        if "decoration" in types_graphic:        
+                                            if "type" in nn.attrib and nn.attrib['type']=='decoration':
+                                                c_t_in_decoration.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                                sumi+=1
+                                            
+                                        if "stamp" in types_graphic:
+                                            if "type" in nn.attrib and nn.attrib['type']=='stamp':
+                                                c_t_in_stamp.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                                sumi+=1
+                                            
+                                        if "signature" in types_graphic:
+                                            if "type" in nn.attrib and nn.attrib['type']=='signature':
+                                                c_t_in_signature.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                                sumi+=1
+                                        
+                                if len(c_t_in_text_annotation)>0:
+                                    co_graphic_text_annotation.append(np.array(c_t_in_text_annotation))
+                                if len(c_t_in_decoration)>0:
+                                    co_graphic_decoration.append(np.array(c_t_in_decoration))
+                                if len(c_t_in_stamp)>0:
+                                    co_graphic_stamp.append(np.array(c_t_in_stamp))
+                                if len(c_t_in_signature)>0:
+                                    co_graphic_signature.append(np.array(c_t_in_signature))
+                
+                    if 'imageregion' in keys:
+                        if tag.endswith('}ImageRegion') or tag.endswith('}imageregion'):
+                            for nn in root1.iter(tag):
+                                c_t_in=[]
+                                sumi=0
+                                for vv in nn.iter():
+                                    if vv.tag==link+'Coords':
+                                        coords=bool(vv.attrib)
+                                        if coords:
+                                            p_h=vv.attrib['points'].split(' ')
+                                            c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                            break
+                                        else:
+                                            pass
+                    
+                    
+                                    if vv.tag==link+'Point':
+                                        c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                        sumi+=1
+
+                                    elif vv.tag!=link+'Point' and sumi>=1:
+                                        break
+                                co_img.append(np.array(c_t_in))
+                
+                    
+                    if 'separatorregion' in keys:
+                        if tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'):
+                            for nn in root1.iter(tag):
+                                c_t_in=[]
+                                sumi=0
+                                for vv in nn.iter():
+                                    # check the format of coords
+                                    if vv.tag==link+'Coords':
+                                        coords=bool(vv.attrib)
+                                        if coords:
+                                            p_h=vv.attrib['points'].split(' ')
+                                            c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                            break
+                                        else:
+                                            pass
+                    
+                    
+                                    if vv.tag==link+'Point':
+                                        c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                        sumi+=1
+
+                                    elif vv.tag!=link+'Point' and sumi>=1:
+                                        break
+                                co_sep.append(np.array(c_t_in))
+                
+                
+                
+                    if 'tableregion' in keys:
+                        if tag.endswith('}TableRegion') or tag.endswith('}tableregion'):
+                            #print('sth')
+                            for nn in root1.iter(tag):
+                                c_t_in=[]
+                                sumi=0
+                                for vv in nn.iter():
+                                    # check the format of coords
+                                    if vv.tag==link+'Coords':
+                                        coords=bool(vv.attrib)
+                                        if coords:
+                                            p_h=vv.attrib['points'].split(' ')
+                                            c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                            break
+                                        else:
+                                            pass
+                    
+                    
+                                    if vv.tag==link+'Point':
+                                        c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                        sumi+=1
+                                    #print(vv.tag,'in')
+                                    elif vv.tag!=link+'Point' and sumi>=1:
+                                        break
+                                co_table.append(np.array(c_t_in))
+                
+                    if 'noiseregion' in keys:
+                        if tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'):
+                            #print('sth')
+                            for nn in root1.iter(tag):
+                                c_t_in=[]
+                                sumi=0
+                                for vv in nn.iter():
+                                    # check the format of coords
+                                    if vv.tag==link+'Coords':
+                                        coords=bool(vv.attrib)
+                                        if coords:
+                                            p_h=vv.attrib['points'].split(' ')
+                                            c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] ) )
+                                            break
+                                        else:
+                                            pass
+                    
+                    
+                                    if vv.tag==link+'Point':
+                                        c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
+                                        sumi+=1
+                                    #print(vv.tag,'in')
+                                    elif vv.tag!=link+'Point' and sumi>=1:
+                                        break
+                                co_noise.append(np.array(c_t_in))
+                
+                img = np.zeros( (y_len,x_len,3) ) 
+
+                if self.output_type == '3d':
+                    
+                    if 'graphicregions' in keys:
+                        if "handwritten-annotation" in types_graphic:
+                            img_poly=cv2.fillPoly(img, pts =co_graphic_text_annotation, color=types_text_label_rgb[ config_params['graphicregions']['handwritten-annotation']])
+                        if "signature" in types_graphic:
+                            img_poly=cv2.fillPoly(img, pts =co_graphic_signature, color=types_text_label_rgb[ config_params['graphicregions']['signature']])
+                        if "decoration" in types_graphic:
+                            img_poly=cv2.fillPoly(img, pts =co_graphic_decoration, color=types_text_label_rgb[ config_params['graphicregions']['decoration']])
+                        if "stamp" in types_graphic:
+                            img_poly=cv2.fillPoly(img, pts =co_graphic_stamp, color=types_text_label_rgb[ config_params['graphicregions']['stamp']])
+                            
+                    if 'imageregion' in keys: 
+                        img_poly=cv2.fillPoly(img, pts =co_img, color=types_text_label_rgb[ config_params['imageregion']])
+                    if 'separatorregion' in keys: 
+                        img_poly=cv2.fillPoly(img, pts =co_sep, color=types_text_label_rgb[ config_params['separatorregion']])
+                    if 'tableregion' in keys:  
+                        img_poly=cv2.fillPoly(img, pts =co_table, color=types_text_label_rgb[ config_params['tableregion']])
+                    if 'noiseregion' in keys:  
+                        img_poly=cv2.fillPoly(img, pts =co_noise, color=types_text_label_rgb[ config_params['noiseregion']])
+                        
+                    if 'textregions' in keys:
+                        if "paragraph" in types_text:
+                            img_poly=cv2.fillPoly(img, pts =co_text_paragraph, color=types_text_label_rgb[ config_params['textregions']['paragraph']])
+                        if "heading" in types_text:
+                            img_poly=cv2.fillPoly(img, pts =co_text_heading, color=types_text_label_rgb[ config_params['textregions']['heading']])
+                        if "header" in types_text:
+                            img_poly=cv2.fillPoly(img, pts =co_text_header, color=types_text_label_rgb[ config_params['textregions']['header']])
+                        if "catch-word" in types_text:
+                            img_poly=cv2.fillPoly(img, pts =co_text_catch, color=types_text_label_rgb[ config_params['textregions']['catch-word']])
+                        if "signature-mark" in types_text:
+                            img_poly=cv2.fillPoly(img, pts =co_text_signature_mark, color=types_text_label_rgb[ config_params['textregions']['signature-mark']])
+                        if "page-number" in types_text:
+                            img_poly=cv2.fillPoly(img, pts =co_text_page_number, color=types_text_label_rgb[ config_params['textregions']['page-number']])
+                        if "marginalia" in types_text:
+                            img_poly=cv2.fillPoly(img, pts =co_text_marginalia, color=types_text_label_rgb[ config_params['textregions']['marginalia']])
+                        if "drop-capital" in types_text:
+                            img_poly=cv2.fillPoly(img, pts =co_text_drop, color=types_text_label_rgb[ config_params['textregions']['drop-capital']])
+                    
+                elif self.output_type == '2d':
+                    if 'graphicregions' in keys:
+                        if "handwritten-annotation" in types_graphic:
+                            color_label = config_params['graphicregions']['handwritten-annotation']
+                            img_poly=cv2.fillPoly(img, pts =co_graphic_text_annotation, color=(color_label,color_label,color_label))
+                        if "signature" in types_graphic:
+                            color_label = config_params['graphicregions']['signature']
+                            img_poly=cv2.fillPoly(img, pts =co_graphic_signature, color=(color_label,color_label,color_label))
+                        if "decoration" in types_graphic:
+                            color_label = config_params['graphicregions']['decoration']
+                            img_poly=cv2.fillPoly(img, pts =co_graphic_decoration, color=(color_label,color_label,color_label))
+                        if "stamp" in types_graphic:
+                            color_label = config_params['graphicregions']['stamp']
+                            img_poly=cv2.fillPoly(img, pts =co_graphic_stamp, color=(color_label,color_label,color_label))
+                    
+                    if 'imageregion' in keys:
+                        color_label = config_params['imageregion']
+                        img_poly=cv2.fillPoly(img, pts =co_img, color=(color_label,color_label,color_label))
+                    if 'separatorregion' in keys: 
+                        color_label = config_params['separatorregion']
+                        img_poly=cv2.fillPoly(img, pts =co_sep, color=(color_label,color_label,color_label))
+                    if 'tableregion' in keys:
+                        color_label = config_params['tableregion']
+                        img_poly=cv2.fillPoly(img, pts =co_table, color=(color_label,color_label,color_label))
+                    if 'noiseregion' in keys:
+                        color_label = config_params['noiseregion']
+                        img_poly=cv2.fillPoly(img, pts =co_noise, color=(color_label,color_label,color_label))
+                        
+                    if 'textregions' in keys:
+                        if "paragraph" in types_text:
+                            color_label = config_params['textregions']['paragraph']
+                            img_poly=cv2.fillPoly(img, pts =co_text_paragraph, color=(color_label,color_label,color_label))
+                        if "heading" in types_text:
+                            color_label = config_params['textregions']['heading']
+                            img_poly=cv2.fillPoly(img, pts =co_text_heading, color=(color_label,color_label,color_label))
+                        if "header" in types_text:
+                            color_label = config_params['textregions']['header']
+                            img_poly=cv2.fillPoly(img, pts =co_text_header, color=(color_label,color_label,color_label))
+                        if "catch-word" in types_text:
+                            color_label = config_params['textregions']['catch-word']
+                            img_poly=cv2.fillPoly(img, pts =co_text_catch, color=(color_label,color_label,color_label))
+                        if "signature-mark" in types_text:
+                            color_label = config_params['textregions']['signature-mark']
+                            img_poly=cv2.fillPoly(img, pts =co_text_signature_mark, color=(color_label,color_label,color_label))
+                        if "page-number" in types_text:
+                            color_label = config_params['textregions']['page-number']
+                            img_poly=cv2.fillPoly(img, pts =co_text_page_number, color=(color_label,color_label,color_label))
+                        if "marginalia" in types_text:
+                            color_label = config_params['textregions']['marginalia']
+                            img_poly=cv2.fillPoly(img, pts =co_text_marginalia, color=(color_label,color_label,color_label))
+                        if "drop-capital" in types_text:
+                            color_label = config_params['textregions']['drop-capital']
+                            img_poly=cv2.fillPoly(img, pts =co_text_drop, color=(color_label,color_label,color_label))
+                    
+                    
+                    
+                    
+                try: 
+                    cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly )
+                except:
+                    cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly )
+                    
+
+                #print(values[0])
            if self.experiment=='word':
                region_tags=np.unique([x for x in alltags if x.endswith('Word')])                  
                co_word=[]
@ -302,6 +743,7 @@ class pagexml2word:
                    if tag.endswith('}TextRegion') or tag.endswith('}Textregion'):
                        #print('sth')
                        for nn in root1.iter(tag):
+                            print(nn.attrib['type'])
                            c_t_in=[]
                            sumi=0
                            for vv in nn.iter():
@ -374,19 +816,18 @@ class pagexml2word:
                                    break
                            co_sep.append(np.array(c_t_in))

+                img_poly = np.zeros( (y_len,x_len,3) ) 
                    
                
-                img = np.zeros( (y_len,x_len,3) ) 
-                
                if self.output_type == '3d':
-                    img_poly=cv2.fillPoly(img, pts =co_text, color=(255,0,0))
-                    img_poly=cv2.fillPoly(img, pts =co_img, color=(0,255,0))
-                    img_poly=cv2.fillPoly(img, pts =co_sep, color=(0,0,255))
+                    img_poly=cv2.fillPoly(img_poly, pts =co_text, color=(255,0,0))
+                    img_poly=cv2.fillPoly(img_poly, pts =co_img, color=(0,255,0))
+                    img_poly=cv2.fillPoly(img_poly, pts =co_sep, color=(0,0,255))
                    ##img_poly=cv2.fillPoly(img, pts =co_graphic, color=(255,125,125))
                elif self.output_type == '2d':
-                    img_poly=cv2.fillPoly(img, pts =co_text, color=(1,1,1))
-                    img_poly=cv2.fillPoly(img, pts =co_img, color=(2,2,2))
-                    img_poly=cv2.fillPoly(img, pts =co_sep, color=(3,3,3))
+                    img_poly=cv2.fillPoly(img_poly, pts =co_text, color=(1,1,1))
+                    img_poly=cv2.fillPoly(img_poly, pts =co_img, color=(2,2,2))
+                    img_poly=cv2.fillPoly(img_poly, pts =co_sep, color=(3,3,3))

                try: 
                    cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly )
@ -1043,9 +1484,9 @@ class pagexml2word:

            #except:
                #pass
-    def run(self):
+    def run(self,config_params):
        self.get_content_of_dir()
-        self.get_images_of_ground_truth()
+        self.get_images_of_ground_truth(config_params)
        
        
@click.command()
@ -1061,6 +1502,14 @@ class pagexml2word:
    help="directory where ground truth images would be written",
    type=click.Path(exists=True, file_okay=False),
 )
+
+@click.option(
+    "--layout_config",
+    "-lc",
+    help="experiment of ineterst. Word , textline , glyph and textregion are desired options.",
+    type=click.Path(exists=True, dir_okay=False),
+)
+
@click.option(
    "--type_output",
    "-to",
@ -1072,9 +1521,16 @@ class pagexml2word:
    help="experiment of ineterst. Word , textline , glyph and textregion are desired options.",
 )

-def main(dir_xml,dir_out,type_output,experiment):
-    x=pagexml2word(dir_xml,dir_out,type_output,experiment)
-    x.run()
+
+def main(dir_xml,dir_out,type_output,experiment,layout_config):
+    if layout_config:
+        with open(layout_config) as f:
+            config_params = json.load(f)
+    else:
+        print("passed")
+        config_params = None
+    x=pagexml2word(dir_xml,dir_out,type_output,experiment, layout_config)
+    x.run(config_params)
 if __name__=="__main__":
    main()