From 4b7f7da07c96d410d0decb4b6879d5586132fdab Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 23 May 2024 17:14:31 +0200 Subject: [PATCH] use cases like textline, word and glyph are added --- custom_config_page2label.json | 11 +- pagexml2label.py | 1055 +++------------------------------ 2 files changed, 93 insertions(+), 973 deletions(-) diff --git a/custom_config_page2label.json b/custom_config_page2label.json index 254f4df..d6320fa 100644 --- a/custom_config_page2label.json +++ b/custom_config_page2label.json @@ -1,8 +1,9 @@ { -"textregions":{"paragraph":1, "heading": 1, "header":1,"drop-capital": 1, "marginalia":1 ,"page-number":1 , "catch-word":1 ,"footnote": 1, "footnote-continued": 1}, -"imageregion":2, -"separatorregion":3, -"graphicregions" :{"handwritten-annotation":2, "decoration": 2, "signature": 2, "stamp": 2}, +"use_case": "layout", +"textregions":{"paragraph":1, "heading": 2, "header":2,"drop-capital": 3, "marginalia":4 ,"page-number":1 , "catch-word":1 ,"footnote": 1, "footnote-continued": 1}, +"imageregion":5, +"separatorregion":6, +"graphicregions" :{"handwritten-annotation":5, "decoration": 5, "signature": 5, "stamp": 5}, "artificial_class_on_boundry": ["paragraph","header", "heading", "marginalia", "page-number", "catch-word", "drop-capital","footnote", "footnote-continued"], -"artificial_class_label":4 +"artificial_class_label":7 } diff --git a/pagexml2label.py b/pagexml2label.py index 63b7acf..16cda8b 100644 --- a/pagexml2label.py +++ b/pagexml2label.py @@ -21,13 +21,12 @@ This classes.txt file is required for dhsegment tool. """ KERNEL = np.ones((5, 5), np.uint8) -class pagexml2word: - def __init__(self,dir_in, out_dir,output_type,experiment,layout_config): +class pagexml2label: + def __init__(self,dir_in, out_dir,output_type,config): self.dir=dir_in self.output_dir=out_dir self.output_type=output_type - self.experiment=experiment - self.layout_config=layout_config + self.config=config def get_content_of_dir(self): """ @@ -127,7 +126,82 @@ class pagexml2word: y_len=int(jj.attrib['imageHeight']) x_len=int(jj.attrib['imageWidth']) - if self.layout_config: + if self.config and (config_params['use_case']=='textline' or config_params['use_case']=='word' or config_params['use_case']=='glyph'): + keys = list(config_params.keys()) + if "artificial_class_label" in keys: + artificial_class_rgb_color = (255,255,0) + artificial_class_label = config_params['artificial_class_label'] + + textline_rgb_color = (255, 0, 0) + + if config_params['use_case']=='textline': + region_tags = np.unique([x for x in alltags if x.endswith('TextLine')]) + elif config_params['use_case']=='word': + region_tags = np.unique([x for x in alltags if x.endswith('Word')]) + elif config_params['use_case']=='glyph': + region_tags = np.unique([x for x in alltags if x.endswith('Glyph')]) + co_use_case = [] + + for tag in region_tags: + if config_params['use_case']=='textline': + tag_endings = ['}TextLine','}textline'] + elif config_params['use_case']=='word': + tag_endings = ['}Word','}word'] + elif config_params['use_case']=='glyph': + tag_endings = ['}Glyph','}glyph'] + + if tag.endswith(tag_endings[0]) or tag.endswith(tag_endings[1]): + for nn in root1.iter(tag): + c_t_in = [] + sumi = 0 + for vv in nn.iter(): + # check the format of coords + if vv.tag == link + 'Coords': + coords = bool(vv.attrib) + if coords: + p_h = vv.attrib['points'].split(' ') + c_t_in.append( + np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) + break + else: + pass + + if vv.tag == link + 'Point': + c_t_in.append([int(np.float(vv.attrib['x'])), int(np.float(vv.attrib['y']))]) + sumi += 1 + elif vv.tag != link + 'Point' and sumi >= 1: + break + co_use_case.append(np.array(c_t_in)) + + + + if "artificial_class_label" in keys: + img_boundary = np.zeros((y_len, x_len)) + erosion_rate = 1 + dilation_rate = 3 + co_use_case, img_boundary = self.update_region_contours(co_use_case, img_boundary, erosion_rate, dilation_rate, y_len, x_len ) + + + img = np.zeros((y_len, x_len, 3)) + if self.output_type == '2d': + img_poly = cv2.fillPoly(img, pts=co_use_case, color=(1, 1, 1)) + if "artificial_class_label" in keys: + img_poly[:,:][img_boundary[:,:]==1] = artificial_class_label + elif self.output_type == '3d': + img_poly = cv2.fillPoly(img, pts=co_use_case, color=textline_rgb_color) + if "artificial_class_label" in keys: + img_poly[:,:,0][img_boundary[:,:]==1] = artificial_class_rgb_color[0] + img_poly[:,:,1][img_boundary[:,:]==1] = artificial_class_rgb_color[1] + img_poly[:,:,2][img_boundary[:,:]==1] = artificial_class_rgb_color[2] + + try: + cv2.imwrite(self.output_dir + '/' + self.gt_list[index].split('-')[1].split('.')[0] + '.png', + img_poly) + except: + cv2.imwrite(self.output_dir + '/' + self.gt_list[index].split('.')[0] + '.png', img_poly) + + + if self.config and config_params['use_case']=='layout': keys = list(config_params.keys()) if "artificial_class_on_boundry" in keys: elements_with_artificial_class = list(config_params['artificial_class_on_boundry']) @@ -139,6 +213,7 @@ class pagexml2word: types_text_dict = config_params['textregions'] types_text = list(types_text_dict.keys()) types_text_label = list(types_text_dict.values()) + print(types_text) if 'graphicregions' in keys: types_graphic_dict = config_params['graphicregions'] types_graphic = list(types_graphic_dict.keys()) @@ -660,957 +735,6 @@ class pagexml2word: cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) - #print(values[0]) - if self.experiment=='word': - region_tags=np.unique([x for x in alltags if x.endswith('Word')]) - co_word=[] - - for tag in region_tags: - if tag.endswith('}Word') or tag.endswith('}word'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_word.append(np.array(c_t_in)) - - img = np.zeros( (y_len,x_len, 3) ) - if self.output_type == '2d': - img_poly=cv2.fillPoly(img, pts =co_word, color=(1,1,1)) - elif self.output_type == '3d': - img_poly=cv2.fillPoly(img, pts =co_word, color=(255,0,0)) - - try: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) - except: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) - - - elif self.experiment=='glyph': - region_tags=np.unique([x for x in alltags if x.endswith('Glyph')]) - co_glyph=[] - - for tag in region_tags: - if tag.endswith('}Glyph') or tag.endswith('}glyph'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_glyph.append(np.array(c_t_in)) - - img = np.zeros( (y_len,x_len, 3) ) - if self.output_type == '2d': - img_poly=cv2.fillPoly(img, pts =co_glyph, color=(1,1,1)) - elif self.output_type == '3d': - img_poly=cv2.fillPoly(img, pts =co_glyph, color=(255,0,0)) - - try: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) - except: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) - - elif self.experiment=='textline': - region_tags=np.unique([x for x in alltags if x.endswith('TextLine')]) - co_line=[] - - for tag in region_tags: - if tag.endswith('}TextLine') or tag.endswith('}textline'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_line.append(np.array(c_t_in)) - - img = np.zeros( (y_len,x_len, 3) ) - if self.output_type == '2d': - img_poly=cv2.fillPoly(img, pts =co_line, color=(1,1,1)) - elif self.output_type == '3d': - img_poly=cv2.fillPoly(img, pts =co_line, color=(255,0,0)) - - try: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) - except: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) - - elif self.experiment == 'textline_new_concept': - region_tags = np.unique([x for x in alltags if x.endswith('TextLine')]) - co_line = [] - - for tag in region_tags: - if tag.endswith('}TextLine') or tag.endswith('}textline'): - # print('sth') - for nn in root1.iter(tag): - c_t_in = [] - sumi = 0 - for vv in nn.iter(): - # check the format of coords - if vv.tag == link + 'Coords': - coords = bool(vv.attrib) - if coords: - p_h = vv.attrib['points'].split(' ') - c_t_in.append( - np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) - break - else: - pass - - if vv.tag == link + 'Point': - c_t_in.append([int(np.float(vv.attrib['x'])), int(np.float(vv.attrib['y']))]) - sumi += 1 - # print(vv.tag,'in') - elif vv.tag != link + 'Point' and sumi >= 1: - break - co_line.append(np.array(c_t_in)) - - img_boundary = np.zeros((y_len, x_len)) - co_textline_eroded = [] - for con in co_line: - # try: - img_boundary_in = np.zeros((y_len, x_len)) - img_boundary_in = cv2.fillPoly(img_boundary_in, pts=[con], color=(1, 1, 1)) - # print('bidiahhhhaaa') - - # img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=7)#asiatica - img_boundary_in = cv2.erode(img_boundary_in[:, :], KERNEL, iterations=1) - - pixel = 1 - min_size = 0 - con_eroded = self.return_contours_of_interested_region(img_boundary_in, pixel, min_size) - - try: - co_textline_eroded.append(con_eroded[0]) - except: - co_textline_eroded.append(con) - - img_boundary_in_dilated = cv2.dilate(img_boundary_in[:, :], KERNEL, iterations=3) - # img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=5) - - boundary = img_boundary_in_dilated[:, :] - img_boundary_in[:, :] - - img_boundary[:, :][boundary[:, :] == 1] = 1 - - img = np.zeros((y_len, x_len, 3)) - if self.output_type == '2d': - img_poly = cv2.fillPoly(img, pts=co_textline_eroded, color=(1, 1, 1)) - img_poly[:, :][img_boundary[:, :] == 1] = 2 - elif self.output_type == '3d': - img_poly = cv2.fillPoly(img, pts=co_textline_eroded, color=(255, 0, 0)) - img_poly[:, :, 0][img_boundary[:, :] == 1] = 255 - img_poly[:, :, 1][img_boundary[:, :] == 1] = 125 - img_poly[:, :, 2][img_boundary[:, :] == 1] = 125 - - try: - cv2.imwrite(self.output_dir + '/' + self.gt_list[index].split('-')[1].split('.')[0] + '.png', - img_poly) - except: - cv2.imwrite(self.output_dir + '/' + self.gt_list[index].split('.')[0] + '.png', img_poly) - - elif self.experiment=='layout_for_main_regions': - region_tags=np.unique([x for x in alltags if x.endswith('Region')]) - #print(region_tags) - co_text=[] - co_sep=[] - co_img=[] - #co_graphic=[] - - for tag in region_tags: - if tag.endswith('}TextRegion') or tag.endswith('}Textregion'): - #print('sth') - for nn in root1.iter(tag): - print(nn.attrib['type']) - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_text.append(np.array(c_t_in)) - - elif tag.endswith('}ImageRegion') or tag.endswith('}GraphicRegion') or tag.endswith('}imageregion'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_img.append(np.array(c_t_in)) - - elif tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_sep.append(np.array(c_t_in)) - - img_poly = np.zeros( (y_len,x_len,3) ) - - - if self.output_type == '3d': - img_poly=cv2.fillPoly(img_poly, pts =co_text, color=(255,0,0)) - img_poly=cv2.fillPoly(img_poly, pts =co_img, color=(0,255,0)) - img_poly=cv2.fillPoly(img_poly, pts =co_sep, color=(0,0,255)) - ##img_poly=cv2.fillPoly(img, pts =co_graphic, color=(255,125,125)) - elif self.output_type == '2d': - img_poly=cv2.fillPoly(img_poly, pts =co_text, color=(1,1,1)) - img_poly=cv2.fillPoly(img_poly, pts =co_img, color=(2,2,2)) - img_poly=cv2.fillPoly(img_poly, pts =co_sep, color=(3,3,3)) - - try: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) - except: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) - - elif self.experiment=='textregion': - region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')]) - co_textregion=[] - - for tag in region_tags: - if tag.endswith('}TextRegion') or tag.endswith('}Textregion'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_textregion.append(np.array(c_t_in)) - - img = np.zeros( (y_len,x_len,3) ) - if self.output_type == '3d': - img_poly=cv2.fillPoly(img, pts =co_textregion, color=(255,0,0)) - elif self.output_type == '2d': - img_poly=cv2.fillPoly(img, pts =co_textregion, color=(1,1,1)) - - - try: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) - except: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) - - elif self.experiment=='layout': - region_tags=np.unique([x for x in alltags if x.endswith('Region')]) - - co_text_paragraph=[] - co_text_drop=[] - co_text_heading=[] - co_text_header=[] - co_text_marginalia=[] - co_text_catch=[] - co_text_page_number=[] - co_text_signature_mark=[] - co_sep=[] - co_img=[] - co_table=[] - co_graphic=[] - co_graphic_text_annotation=[] - co_graphic_decoration=[] - co_noise=[] - - for tag in region_tags: - if tag.endswith('}TextRegion') or tag.endswith('}Textregion'): - for nn in root1.iter(tag): - c_t_in_drop=[] - c_t_in_paragraph=[] - c_t_in_heading=[] - c_t_in_header=[] - c_t_in_page_number=[] - c_t_in_signature_mark=[] - c_t_in_catch=[] - c_t_in_marginalia=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - - coords=bool(vv.attrib) - if coords: - #print('birda1') - p_h=vv.attrib['points'].split(' ') - - - - if "type" in nn.attrib and nn.attrib['type']=='drop-capital': - #if nn.attrib['type']=='paragraph': - - c_t_in_drop.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - - elif "type" in nn.attrib and nn.attrib['type']=='heading': - c_t_in_heading.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - - - elif "type" in nn.attrib and nn.attrib['type']=='signature-mark': - - c_t_in_signature_mark.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - #print(c_t_in_paragraph) - elif "type" in nn.attrib and nn.attrib['type']=='header': - c_t_in_header.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - - - elif "type" in nn.attrib and nn.attrib['type']=='catch-word': - c_t_in_catch.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - - - elif "type" in nn.attrib and nn.attrib['type']=='page-number': - - c_t_in_page_number.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - #print(c_t_in_paragraph) - - elif "type" in nn.attrib and nn.attrib['type']=='marginalia': - - c_t_in_marginalia.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - #print(c_t_in_paragraph) - else: - - c_t_in_paragraph.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - #print(c_t_in_paragraph) - - break - else: - pass - - - if vv.tag==link+'Point': - if "type" in nn.attrib and nn.attrib['type']=='drop-capital': - #if nn.attrib['type']=='paragraph': - - c_t_in_drop.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - - elif "type" in nn.attrib and nn.attrib['type']=='heading': - c_t_in_heading.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - - - elif "type" in nn.attrib and nn.attrib['type']=='signature-mark': - - c_t_in_signature_mark.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - #print(c_t_in_paragraph) - sumi+=1 - elif "type" in nn.attrib and nn.attrib['type']=='header': - c_t_in_header.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - - - elif "type" in nn.attrib and nn.attrib['type']=='catch-word': - c_t_in_catch.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - - - elif "type" in nn.attrib and nn.attrib['type']=='page-number': - - c_t_in_page_number.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - #print(c_t_in_paragraph) - sumi+=1 - - elif "type" in nn.attrib and nn.attrib['type']=='marginalia': - - c_t_in_marginalia.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - #print(c_t_in_paragraph) - sumi+=1 - - else: - c_t_in_paragraph.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - #print(c_t_in_paragraph) - sumi+=1 - - #c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - - if len(c_t_in_drop)>0: - co_text_drop.append(np.array(c_t_in_drop)) - if len(c_t_in_paragraph)>0: - co_text_paragraph.append(np.array(c_t_in_paragraph)) - if len(c_t_in_heading)>0: - co_text_heading.append(np.array(c_t_in_heading)) - - if len(c_t_in_header)>0: - co_text_header.append(np.array(c_t_in_header)) - if len(c_t_in_page_number)>0: - co_text_page_number.append(np.array(c_t_in_page_number)) - if len(c_t_in_catch)>0: - co_text_catch.append(np.array(c_t_in_catch)) - - if len(c_t_in_signature_mark)>0: - co_text_signature_mark.append(np.array(c_t_in_signature_mark)) - - if len(c_t_in_marginalia)>0: - co_text_marginalia.append(np.array(c_t_in_marginalia)) - - - elif tag.endswith('}GraphicRegion') or tag.endswith('}graphicregion'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - c_t_in_text_annotation=[] - c_t_in_decoration=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - #c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - - if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation': - #if nn.attrib['type']=='paragraph': - - c_t_in_text_annotation.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - - elif "type" in nn.attrib and nn.attrib['type']=='decoration': - - c_t_in_decoration.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - #print(c_t_in_paragraph) - else: - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - - - - break - else: - pass - - - if vv.tag==link+'Point': - - if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation': - #if nn.attrib['type']=='paragraph': - - c_t_in_text_annotation.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - - elif "type" in nn.attrib and nn.attrib['type']=='decoration': - - c_t_in_decoration.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - #print(c_t_in_paragraph) - sumi+=1 - else: - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - - if len(c_t_in_text_annotation)>0: - co_graphic_text_annotation.append(np.array(c_t_in_text_annotation)) - if len(c_t_in_decoration)>0: - co_graphic_decoration.append(np.array(c_t_in_decoration)) - if len(c_t_in)>0: - co_graphic.append(np.array(c_t_in)) - - - - elif tag.endswith('}ImageRegion') or tag.endswith('}imageregion'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_img.append(np.array(c_t_in)) - - elif tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_sep.append(np.array(c_t_in)) - - - - elif tag.endswith('}TableRegion') or tag.endswith('}tableregion'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_table.append(np.array(c_t_in)) - - elif tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_noise.append(np.array(c_t_in)) - - - img = np.zeros( (y_len,x_len,3) ) - - if self.output_type == '3d': - img_poly=cv2.fillPoly(img, pts =co_text_paragraph, color=(255,0,0)) - - img_poly=cv2.fillPoly(img, pts =co_text_heading, color=(255,125,0)) - img_poly=cv2.fillPoly(img, pts =co_text_header, color=(255,0,125)) - img_poly=cv2.fillPoly(img, pts =co_text_catch, color=(125,255,125)) - img_poly=cv2.fillPoly(img, pts =co_text_signature_mark, color=(125,125,0)) - img_poly=cv2.fillPoly(img, pts =co_graphic_decoration, color=(0,125,255)) - img_poly=cv2.fillPoly(img, pts =co_text_page_number, color=(0,125,0)) - img_poly=cv2.fillPoly(img, pts =co_text_marginalia, color=(125,125,125)) - img_poly=cv2.fillPoly(img, pts =co_text_drop, color=(0,125,255)) - - img_poly=cv2.fillPoly(img, pts =co_graphic_text_annotation, color=(125,0,125)) - img_poly=cv2.fillPoly(img, pts =co_img, color=(0,255,0)) - img_poly=cv2.fillPoly(img, pts =co_sep, color=(0,0,255)) - img_poly=cv2.fillPoly(img, pts =co_table, color=(0,255,255)) - img_poly=cv2.fillPoly(img, pts =co_graphic, color=(255,125,125)) - img_poly=cv2.fillPoly(img, pts =co_noise, color=(255,0,255)) - elif self.output_type == '2d': - img_poly=cv2.fillPoly(img, pts =co_text_paragraph, color=(1,1,1)) - - img_poly=cv2.fillPoly(img, pts =co_text_heading, color=(2,2,2)) - img_poly=cv2.fillPoly(img, pts =co_text_header, color=(2,2,2)) - img_poly=cv2.fillPoly(img, pts =co_text_catch, color=(3,3,3)) - img_poly=cv2.fillPoly(img, pts =co_text_signature_mark, color=(4,4,4)) - img_poly=cv2.fillPoly(img, pts =co_graphic_decoration, color=(5,5,5)) - img_poly=cv2.fillPoly(img, pts =co_text_page_number, color=(6,6,6)) - img_poly=cv2.fillPoly(img, pts =co_text_marginalia, color=(7,7,7)) - img_poly=cv2.fillPoly(img, pts =co_text_drop, color=(8,8,8)) - - img_poly=cv2.fillPoly(img, pts =co_graphic_text_annotation, color=(9,9,9)) - img_poly=cv2.fillPoly(img, pts =co_img, color=(10,10,10)) - img_poly=cv2.fillPoly(img, pts =co_sep, color=(11,11,11)) - img_poly=cv2.fillPoly(img, pts =co_table, color=(12,12,12)) - img_poly=cv2.fillPoly(img, pts =co_graphic, color=(13,13,14)) - img_poly=cv2.fillPoly(img, pts =co_noise, color=(15,15,15)) - - try: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) - except: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) - - - elif self.experiment=='layout_for_main_regions_new_concept': - region_tags=np.unique([x for x in alltags if x.endswith('Region')]) - #print(region_tags) - co_text=[] - co_sep=[] - co_img=[] - co_drop = [] - co_graphic=[] - co_table = [] - - for tag in region_tags: - if tag.endswith('}TextRegion') or tag.endswith('}Textregion'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - c_t_in_drop = [] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - if "type" in nn.attrib and nn.attrib['type']=='drop-capital': - c_t_in_drop.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - else: - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - if "type" in nn.attrib and nn.attrib['type']=='drop-capital': - c_t_in_drop.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - else: - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - if len(c_t_in)>0: - co_text.append(np.array(c_t_in)) - if len(c_t_in_drop)>0: - co_drop.append(np.array(c_t_in_drop)) - - elif tag.endswith('}ImageRegion') or tag.endswith('}GraphicRegion') or tag.endswith('}imageregion'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_img.append(np.array(c_t_in)) - - elif tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_sep.append(np.array(c_t_in)) - - elif tag.endswith('}TableRegion') or tag.endswith('}tableregion'): - #print('sth') - for nn in root1.iter(tag): - c_t_in=[] - sumi=0 - for vv in nn.iter(): - # check the format of coords - if vv.tag==link+'Coords': - coords=bool(vv.attrib) - if coords: - p_h=vv.attrib['points'].split(' ') - c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - break - else: - pass - - - if vv.tag==link+'Point': - c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ]) - sumi+=1 - #print(vv.tag,'in') - elif vv.tag!=link+'Point' and sumi>=1: - break - co_table.append(np.array(c_t_in)) - - img_boundary = np.zeros( (y_len,x_len) ) - - - co_text_eroded = [] - for con in co_text: - #try: - img_boundary_in = np.zeros( (y_len,x_len) ) - img_boundary_in = cv2.fillPoly(img_boundary_in, pts=[con], color=(1, 1, 1)) - #print('bidiahhhhaaa') - - - - #img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=7)#asiatica - img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=2) - - pixel = 1 - min_size = 0 - con_eroded = self.return_contours_of_interested_region(img_boundary_in,pixel, min_size ) - - try: - co_text_eroded.append(con_eroded[0]) - except: - co_text_eroded.append(con) - - img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=4) - #img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=5) - - boundary = img_boundary_in_dilated[:,:] - img_boundary_in[:,:] - - img_boundary[:,:][boundary[:,:]==1] =1 - - - ###co_table_eroded = [] - ###for con in co_table: - ####try: - ###img_boundary_in = np.zeros( (y_len,x_len) ) - ###img_boundary_in = cv2.fillPoly(img_boundary_in, pts=[con], color=(1, 1, 1)) - ####print('bidiahhhhaaa') - - - - #####img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=7)#asiatica - ###img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=2) - - ###pixel = 1 - ###min_size = 0 - ###con_eroded = self.return_contours_of_interested_region(img_boundary_in,pixel, min_size ) - - ###try: - ###co_table_eroded.append(con_eroded[0]) - ###except: - ###co_table_eroded.append(con) - - ###img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=4) - - ###boundary = img_boundary_in_dilated[:,:] - img_boundary_in[:,:] - - ###img_boundary[:,:][boundary[:,:]==1] =1 - #except: - #pass - - #for con in co_img: - #img_boundary_in = np.zeros( (y_len,x_len) ) - #img_boundary_in = cv2.fillPoly(img_boundary_in, pts=[con], color=(1, 1, 1)) - #img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=3) - - #boundary = img_boundary_in_dilated[:,:] - img_boundary_in[:,:] - - #img_boundary[:,:][boundary[:,:]==1] =1 - - - #for con in co_sep: - - #img_boundary_in = np.zeros( (y_len,x_len) ) - #img_boundary_in = cv2.fillPoly(img_boundary_in, pts=[con], color=(1, 1, 1)) - #img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=3) - - #boundary = img_boundary_in_dilated[:,:] - img_boundary_in[:,:] - - img_boundary[:,:][boundary[:,:]==1] =1 - for con in co_drop: - img_boundary_in = np.zeros( (y_len,x_len) ) - img_boundary_in = cv2.fillPoly(img_boundary_in, pts=[con], color=(1, 1, 1)) - img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=3) - - boundary = img_boundary_in_dilated[:,:] - img_boundary_in[:,:] - - img_boundary[:,:][boundary[:,:]==1] =1 - - - img = np.zeros( (y_len,x_len,3) ) - - if self.output_type == '2d': - img_poly=cv2.fillPoly(img, pts =co_img, color=(2,2,2)) - - img_poly=cv2.fillPoly(img, pts =co_text_eroded, color=(1,1,1)) - ##img_poly=cv2.fillPoly(img, pts =co_graphic, color=(4,4,4)) - ###img_poly=cv2.fillPoly(img, pts =co_table, color=(1,1,1)) - - img_poly=cv2.fillPoly(img, pts =co_drop, color=(1,1,1)) - img_poly[:,:][img_boundary[:,:]==1] = 4 - img_poly=cv2.fillPoly(img, pts =co_sep, color=(3,3,3)) - elif self.output_type == '3d': - img_poly=cv2.fillPoly(img, pts =co_img, color=(0,255,0)) - img_poly=cv2.fillPoly(img, pts =co_text_eroded, color=(255,0,0)) - img_poly=cv2.fillPoly(img, pts =co_drop, color=(0,125,255)) - - img_poly[:,:,0][img_boundary[:,:]==1]=255 - img_poly[:,:,1][img_boundary[:,:]==1]=125 - img_poly[:,:,2][img_boundary[:,:]==1]=125 - - img_poly=cv2.fillPoly(img, pts =co_sep, color=(0,0,255)) - ##img_poly=cv2.fillPoly(img, pts =co_graphic, color=(255,125,125)) - - #print('yazdimmm',self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png') - try: - #print('yazdimmm',self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png') - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) - except: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) - - - - #except: - #pass def run(self,config_params): self.get_content_of_dir() self.get_images_of_ground_truth(config_params) @@ -1631,9 +755,9 @@ class pagexml2word: ) @click.option( - "--layout_config", - "-lc", - help="config file of prefered layout.", + "--config", + "-cfg", + help="config file of prefered layout or use case.", type=click.Path(exists=True, dir_okay=False), ) @@ -1642,21 +766,16 @@ class pagexml2word: "-to", help="this defines how output should be. A 2d image array or a 3d image array encoded with RGB color. Just pass 2d or 3d. The file will be saved one directory up. 2D image array is 3d but only information of one channel would be enough since all channels have the same values.", ) -@click.option( - "--experiment", - "-exp", - help="experiment of ineterst. Word , textline , glyph and textregion are desired options.", -) -def main(dir_xml,dir_out,type_output,experiment,layout_config): - if layout_config: - with open(layout_config) as f: +def main(dir_xml,dir_out,type_output,config): + if config: + with open(config) as f: config_params = json.load(f) else: print("passed") config_params = None - x=pagexml2word(dir_xml,dir_out,type_output,experiment, layout_config) + x=pagexml2label(dir_xml,dir_out,type_output, config) x.run(config_params) if __name__=="__main__": main()