|
|
|
@ -180,7 +180,7 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
if vv.tag == link + 'Point':
|
|
|
|
|
c_t_in.append([int(np.float(vv.attrib['x'])), int(np.float(vv.attrib['y']))])
|
|
|
|
|
c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))])
|
|
|
|
|
sumi += 1
|
|
|
|
|
elif vv.tag != link + 'Point' and sumi >= 1:
|
|
|
|
|
break
|
|
|
|
@ -226,7 +226,6 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
types_text_dict = config_params['textregions']
|
|
|
|
|
types_text = list(types_text_dict.keys())
|
|
|
|
|
types_text_label = list(types_text_dict.values())
|
|
|
|
|
print(types_text)
|
|
|
|
|
if 'graphicregions' in keys:
|
|
|
|
|
types_graphic_dict = config_params['graphicregions']
|
|
|
|
|
types_graphic = list(types_graphic_dict.keys())
|
|
|
|
@ -235,41 +234,20 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
|
|
|
|
|
labels_rgb_color = [ (0,0,0), (255,0,0), (255,125,0), (255,0,125), (125,255,125), (125,125,0), (0,125,255), (0,125,0), (125,125,125), (255,0,255), (125,0,125), (0,255,0),(0,0,255), (0,255,255), (255,125,125), (0,125,125), (0,255,125), (255,125,255), (125,255,0)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
region_tags=np.unique([x for x in alltags if x.endswith('Region')])
|
|
|
|
|
|
|
|
|
|
co_text_paragraph=[]
|
|
|
|
|
co_text_footnote=[]
|
|
|
|
|
co_text_footnote_con=[]
|
|
|
|
|
co_text_drop=[]
|
|
|
|
|
co_text_heading=[]
|
|
|
|
|
co_text_header=[]
|
|
|
|
|
co_text_marginalia=[]
|
|
|
|
|
co_text_catch=[]
|
|
|
|
|
co_text_page_number=[]
|
|
|
|
|
co_text_signature_mark=[]
|
|
|
|
|
co_text = {'drop-capital':[], "footnote":[], "footnote-continued":[], "heading":[], "signature-mark":[], "header":[], "catch-word":[], "page-number":[], "marginalia":[], "paragraph":[]}
|
|
|
|
|
co_graphic = {"handwritten-annotation":[], "decoration":[], "stamp":[], "signature":[]}
|
|
|
|
|
co_sep=[]
|
|
|
|
|
co_img=[]
|
|
|
|
|
co_table=[]
|
|
|
|
|
co_graphic_signature=[]
|
|
|
|
|
co_graphic_text_annotation=[]
|
|
|
|
|
co_graphic_decoration=[]
|
|
|
|
|
co_graphic_stamp=[]
|
|
|
|
|
co_noise=[]
|
|
|
|
|
|
|
|
|
|
for tag in region_tags:
|
|
|
|
|
if 'textregions' in keys:
|
|
|
|
|
if tag.endswith('}TextRegion') or tag.endswith('}Textregion'):
|
|
|
|
|
for nn in root1.iter(tag):
|
|
|
|
|
c_t_in_drop=[]
|
|
|
|
|
c_t_in_paragraph=[]
|
|
|
|
|
c_t_in_heading=[]
|
|
|
|
|
c_t_in_header=[]
|
|
|
|
|
c_t_in_page_number=[]
|
|
|
|
|
c_t_in_signature_mark=[]
|
|
|
|
|
c_t_in_catch=[]
|
|
|
|
|
c_t_in_marginalia=[]
|
|
|
|
|
c_t_in_footnote=[]
|
|
|
|
|
c_t_in_footnote_con=[]
|
|
|
|
|
c_t_in = {'drop-capital':[], "footnote":[], "footnote-continued":[], "heading":[], "signature-mark":[], "header":[], "catch-word":[], "page-number":[], "marginalia":[], "paragraph":[]}
|
|
|
|
|
sumi=0
|
|
|
|
|
for vv in nn.iter():
|
|
|
|
|
# check the format of coords
|
|
|
|
@ -277,143 +255,63 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
|
|
|
|
|
coords=bool(vv.attrib)
|
|
|
|
|
if coords:
|
|
|
|
|
#print('birda1')
|
|
|
|
|
p_h=vv.attrib['points'].split(' ')
|
|
|
|
|
|
|
|
|
|
if "drop-capital" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
|
|
|
|
|
c_t_in_drop.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "footnote" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='footnote':
|
|
|
|
|
c_t_in_footnote.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "footnote-continued" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='footnote-continued':
|
|
|
|
|
c_t_in_footnote_con.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "heading" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='heading':
|
|
|
|
|
c_t_in_heading.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "signature-mark" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='signature-mark':
|
|
|
|
|
c_t_in_signature_mark.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "header" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='header':
|
|
|
|
|
c_t_in_header.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "catch-word" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='catch-word':
|
|
|
|
|
c_t_in_catch.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "page-number" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='page-number':
|
|
|
|
|
c_t_in_page_number.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "marginalia" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='marginalia':
|
|
|
|
|
c_t_in_marginalia.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "paragraph" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='paragraph':
|
|
|
|
|
c_t_in_paragraph.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "rest_as_paragraph" in types_text:
|
|
|
|
|
types_text_without_paragraph = [element for element in types_text if element!='rest_as_paragraph' and element!='paragraph']
|
|
|
|
|
if len(types_text_without_paragraph) == 0:
|
|
|
|
|
if "type" in nn.attrib:
|
|
|
|
|
c_t_in['paragraph'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
elif len(types_text_without_paragraph) >= 1:
|
|
|
|
|
if "type" in nn.attrib:
|
|
|
|
|
if nn.attrib['type'] in types_text_without_paragraph:
|
|
|
|
|
c_t_in[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
else:
|
|
|
|
|
c_t_in['paragraph'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
if "type" in nn.attrib:
|
|
|
|
|
c_t_in[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
break
|
|
|
|
|
else:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
|
|
|
if "drop-capital" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
|
|
|
|
|
c_t_in_drop.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if "footnote" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='footnote':
|
|
|
|
|
c_t_in_footnote.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if "footnote-continued" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='footnote-continued':
|
|
|
|
|
c_t_in_footnote_con.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if "heading" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='heading':
|
|
|
|
|
c_t_in_heading.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if "signature-mark" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='signature-mark':
|
|
|
|
|
c_t_in_signature_mark.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if "header" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='header':
|
|
|
|
|
c_t_in_header.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if "catch-word" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='catch-word':
|
|
|
|
|
c_t_in_catch.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if "page-number" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='page-number':
|
|
|
|
|
c_t_in_page_number.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if "marginalia" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='marginalia':
|
|
|
|
|
c_t_in_marginalia.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if "paragraph" in types_text:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='paragraph':
|
|
|
|
|
c_t_in_paragraph.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
if "rest_as_paragraph" in types_text:
|
|
|
|
|
types_text_without_paragraph = [element for element in types_text if element!='rest_as_paragraph' and element!='paragraph']
|
|
|
|
|
if len(types_text_without_paragraph) == 0:
|
|
|
|
|
if "type" in nn.attrib:
|
|
|
|
|
c_t_in['paragraph'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
|
|
|
|
|
sumi+=1
|
|
|
|
|
elif len(types_text_without_paragraph) >= 1:
|
|
|
|
|
if "type" in nn.attrib:
|
|
|
|
|
if nn.attrib['type'] in types_text_without_paragraph:
|
|
|
|
|
c_t_in[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
|
|
|
|
|
sumi+=1
|
|
|
|
|
else:
|
|
|
|
|
c_t_in['paragraph'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
if "type" in nn.attrib:
|
|
|
|
|
c_t_in[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if len(c_t_in_drop)>0:
|
|
|
|
|
co_text_drop.append(np.array(c_t_in_drop))
|
|
|
|
|
if len(c_t_in_footnote_con)>0:
|
|
|
|
|
co_text_footnote_con.append(np.array(c_t_in_footnote_con))
|
|
|
|
|
if len(c_t_in_footnote)>0:
|
|
|
|
|
co_text_footnote.append(np.array(c_t_in_footnote))
|
|
|
|
|
if len(c_t_in_paragraph)>0:
|
|
|
|
|
co_text_paragraph.append(np.array(c_t_in_paragraph))
|
|
|
|
|
if len(c_t_in_heading)>0:
|
|
|
|
|
co_text_heading.append(np.array(c_t_in_heading))
|
|
|
|
|
|
|
|
|
|
if len(c_t_in_header)>0:
|
|
|
|
|
co_text_header.append(np.array(c_t_in_header))
|
|
|
|
|
if len(c_t_in_page_number)>0:
|
|
|
|
|
co_text_page_number.append(np.array(c_t_in_page_number))
|
|
|
|
|
if len(c_t_in_catch)>0:
|
|
|
|
|
co_text_catch.append(np.array(c_t_in_catch))
|
|
|
|
|
|
|
|
|
|
if len(c_t_in_signature_mark)>0:
|
|
|
|
|
co_text_signature_mark.append(np.array(c_t_in_signature_mark))
|
|
|
|
|
|
|
|
|
|
if len(c_t_in_marginalia)>0:
|
|
|
|
|
co_text_marginalia.append(np.array(c_t_in_marginalia))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for element_text in list(c_t_in.keys()):
|
|
|
|
|
if len(c_t_in[element_text])>0:
|
|
|
|
|
co_text[element_text].append(np.array(c_t_in[element_text]))
|
|
|
|
|
|
|
|
|
|
if 'graphicregions' in keys:
|
|
|
|
|
if tag.endswith('}GraphicRegion') or tag.endswith('}graphicregion'):
|
|
|
|
|
#print('sth')
|
|
|
|
|
for nn in root1.iter(tag):
|
|
|
|
|
c_t_in_stamp=[]
|
|
|
|
|
c_t_in_text_annotation=[]
|
|
|
|
|
c_t_in_decoration=[]
|
|
|
|
|
c_t_in_signature=[]
|
|
|
|
|
c_t_in_graphic = {"handwritten-annotation":[], "decoration":[], "stamp":[], "signature":[]}
|
|
|
|
|
sumi=0
|
|
|
|
|
for vv in nn.iter():
|
|
|
|
|
# check the format of coords
|
|
|
|
@ -421,23 +319,22 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
coords=bool(vv.attrib)
|
|
|
|
|
if coords:
|
|
|
|
|
p_h=vv.attrib['points'].split(' ')
|
|
|
|
|
if "handwritten-annotation" in types_graphic:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
|
|
|
|
|
c_t_in_text_annotation.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "decoration" in types_graphic:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='decoration':
|
|
|
|
|
c_t_in_decoration.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "stamp" in types_graphic:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='stamp':
|
|
|
|
|
c_t_in_stamp.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
if "signature" in types_graphic:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='signature':
|
|
|
|
|
c_t_in_signature.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if "rest_as_decoration" in types_graphic:
|
|
|
|
|
types_graphic_without_decoration = [element for element in types_graphic if element!='rest_as_decoration' and element!='decoration']
|
|
|
|
|
if len(types_graphic_without_decoration) == 0:
|
|
|
|
|
if "type" in nn.attrib:
|
|
|
|
|
c_t_in_graphic['decoration'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
elif len(types_graphic_without_decoration) >= 1:
|
|
|
|
|
if "type" in nn.attrib:
|
|
|
|
|
if nn.attrib['type'] in types_graphic_without_decoration:
|
|
|
|
|
c_t_in_graphic[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
else:
|
|
|
|
|
c_t_in_graphic['decoration'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
if "type" in nn.attrib:
|
|
|
|
|
c_t_in_graphic[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) )
|
|
|
|
|
|
|
|
|
|
break
|
|
|
|
|
else:
|
|
|
|
@ -445,34 +342,33 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
|
|
|
if "handwritten-annotation" in types_graphic:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
|
|
|
|
|
c_t_in_text_annotation.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
if "rest_as_decoration" in types_graphic:
|
|
|
|
|
types_graphic_without_decoration = [element for element in types_graphic if element!='rest_as_decoration' and element!='decoration']
|
|
|
|
|
if len(types_graphic_without_decoration) == 0:
|
|
|
|
|
if "type" in nn.attrib:
|
|
|
|
|
c_t_in_graphic['decoration'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
|
|
|
|
|
sumi+=1
|
|
|
|
|
elif len(types_graphic_without_decoration) >= 1:
|
|
|
|
|
if "type" in nn.attrib:
|
|
|
|
|
if nn.attrib['type'] in types_graphic_without_decoration:
|
|
|
|
|
c_t_in_graphic[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
|
|
|
|
|
sumi+=1
|
|
|
|
|
else:
|
|
|
|
|
c_t_in_graphic['decoration'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
if "type" in nn.attrib:
|
|
|
|
|
c_t_in_graphic[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] )
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if "decoration" in types_graphic:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='decoration':
|
|
|
|
|
c_t_in_decoration.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if "stamp" in types_graphic:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='stamp':
|
|
|
|
|
c_t_in_stamp.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if "signature" in types_graphic:
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='signature':
|
|
|
|
|
c_t_in_signature.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
for element_graphic in list(c_t_in_graphic.keys()):
|
|
|
|
|
if len(c_t_in_graphic[element_graphic])>0:
|
|
|
|
|
co_graphic[element_graphic].append(np.array(c_t_in_graphic[element_graphic]))
|
|
|
|
|
|
|
|
|
|
if len(c_t_in_text_annotation)>0:
|
|
|
|
|
co_graphic_text_annotation.append(np.array(c_t_in_text_annotation))
|
|
|
|
|
if len(c_t_in_decoration)>0:
|
|
|
|
|
co_graphic_decoration.append(np.array(c_t_in_decoration))
|
|
|
|
|
if len(c_t_in_stamp)>0:
|
|
|
|
|
co_graphic_stamp.append(np.array(c_t_in_stamp))
|
|
|
|
|
if len(c_t_in_signature)>0:
|
|
|
|
|
co_graphic_signature.append(np.array(c_t_in_signature))
|
|
|
|
|
|
|
|
|
|
if 'imageregion' in keys:
|
|
|
|
|
if tag.endswith('}ImageRegion') or tag.endswith('}imageregion'):
|
|
|
|
@ -491,7 +387,7 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
|
|
|
c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
|
|
@ -517,7 +413,7 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
|
|
|
c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
|
|
@ -545,7 +441,7 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
|
|
|
c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
#print(vv.tag,'in')
|
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
|
|
@ -571,7 +467,7 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
|
|
|
c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
#print(vv.tag,'in')
|
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
|
|
@ -583,59 +479,63 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
if "paragraph" in elements_with_artificial_class:
|
|
|
|
|
erosion_rate = 2
|
|
|
|
|
dilation_rate = 4
|
|
|
|
|
co_text_paragraph, img_boundary = update_region_contours(co_text_paragraph, img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
co_text['paragraph'], img_boundary = update_region_contours(co_text['paragraph'], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
if "drop-capital" in elements_with_artificial_class:
|
|
|
|
|
erosion_rate = 0
|
|
|
|
|
dilation_rate = 4
|
|
|
|
|
co_text_drop, img_boundary = update_region_contours(co_text_drop, img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
co_text["drop-capital"], img_boundary = update_region_contours(co_text["drop-capital"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
if "catch-word" in elements_with_artificial_class:
|
|
|
|
|
erosion_rate = 0
|
|
|
|
|
dilation_rate = 4
|
|
|
|
|
co_text_catch, img_boundary = update_region_contours(co_text_catch, img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
co_text["catch-word"], img_boundary = update_region_contours(co_text["catch-word"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
if "page-number" in elements_with_artificial_class:
|
|
|
|
|
erosion_rate = 0
|
|
|
|
|
dilation_rate = 4
|
|
|
|
|
co_text_page_number, img_boundary = update_region_contours(co_text_page_number, img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
co_text["page-number"], img_boundary = update_region_contours(co_text["page-number"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
if "header" in elements_with_artificial_class:
|
|
|
|
|
erosion_rate = 1
|
|
|
|
|
dilation_rate = 4
|
|
|
|
|
co_text_header, img_boundary = update_region_contours(co_text_header, img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
co_text["header"], img_boundary = update_region_contours(co_text["header"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
if "heading" in elements_with_artificial_class:
|
|
|
|
|
erosion_rate = 1
|
|
|
|
|
dilation_rate = 4
|
|
|
|
|
co_text_heading, img_boundary = update_region_contours(co_text_heading, img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
co_text["heading"], img_boundary = update_region_contours(co_text["heading"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
if "signature-mark" in elements_with_artificial_class:
|
|
|
|
|
erosion_rate = 1
|
|
|
|
|
dilation_rate = 4
|
|
|
|
|
co_text_signature_mark, img_boundary = update_region_contours(co_text_signature_mark, img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
co_text["signature-mark"], img_boundary = update_region_contours(co_text["signature-mark"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
if "marginalia" in elements_with_artificial_class:
|
|
|
|
|
erosion_rate = 2
|
|
|
|
|
dilation_rate = 4
|
|
|
|
|
co_text_marginalia, img_boundary = update_region_contours(co_text_marginalia, img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
co_text["marginalia"], img_boundary = update_region_contours(co_text["marginalia"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
if "footnote" in elements_with_artificial_class:
|
|
|
|
|
erosion_rate = 2
|
|
|
|
|
dilation_rate = 4
|
|
|
|
|
co_text_footnote, img_boundary = update_region_contours(co_text_footnote, img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
co_text["footnote"], img_boundary = update_region_contours(co_text["footnote"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
if "footnote-continued" in elements_with_artificial_class:
|
|
|
|
|
erosion_rate = 2
|
|
|
|
|
dilation_rate = 4
|
|
|
|
|
co_text_footnote_con, img_boundary = update_region_contours(co_text_footnote_con, img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
co_text["footnote-continued"], img_boundary = update_region_contours(co_text["footnote-continued"], img_boundary, erosion_rate, dilation_rate, y_len, x_len )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
img = np.zeros( (y_len,x_len,3) )
|
|
|
|
|
|
|
|
|
|
if output_type == '3d':
|
|
|
|
|
|
|
|
|
|
if 'graphicregions' in keys:
|
|
|
|
|
if "handwritten-annotation" in types_graphic:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_graphic_text_annotation, color=labels_rgb_color[ config_params['graphicregions']['handwritten-annotation']])
|
|
|
|
|
if "signature" in types_graphic:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_graphic_signature, color=labels_rgb_color[ config_params['graphicregions']['signature']])
|
|
|
|
|
if "decoration" in types_graphic:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_graphic_decoration, color=labels_rgb_color[ config_params['graphicregions']['decoration']])
|
|
|
|
|
if "stamp" in types_graphic:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_graphic_stamp, color=labels_rgb_color[ config_params['graphicregions']['stamp']])
|
|
|
|
|
if 'rest_as_decoration' in types_graphic:
|
|
|
|
|
types_graphic[types_graphic=='rest_as_decoration'] = 'decoration'
|
|
|
|
|
for element_graphic in types_graphic:
|
|
|
|
|
if element_graphic == 'decoration':
|
|
|
|
|
color_label = labels_rgb_color[ config_params['graphicregions']['rest_as_decoration']]
|
|
|
|
|
else:
|
|
|
|
|
color_label = labels_rgb_color[ config_params['graphicregions'][element_graphic]]
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label)
|
|
|
|
|
else:
|
|
|
|
|
for element_graphic in types_graphic:
|
|
|
|
|
color_label = labels_rgb_color[ config_params['graphicregions'][element_graphic]]
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if 'imageregion' in keys:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_img, color=labels_rgb_color[ config_params['imageregion']])
|
|
|
|
@ -647,26 +547,19 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_noise, color=labels_rgb_color[ config_params['noiseregion']])
|
|
|
|
|
|
|
|
|
|
if 'textregions' in keys:
|
|
|
|
|
if "paragraph" in types_text:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_paragraph, color=labels_rgb_color[ config_params['textregions']['paragraph']])
|
|
|
|
|
if "footnote" in types_text:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_footnote, color=labels_rgb_color[ config_params['textregions']['footnote']])
|
|
|
|
|
if "footnote-continued" in types_text:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_footnote_con, color=labels_rgb_color[ config_params['textregions']['footnote-continued']])
|
|
|
|
|
if "heading" in types_text:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_heading, color=labels_rgb_color[ config_params['textregions']['heading']])
|
|
|
|
|
if "header" in types_text:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_header, color=labels_rgb_color[ config_params['textregions']['header']])
|
|
|
|
|
if "catch-word" in types_text:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_catch, color=labels_rgb_color[ config_params['textregions']['catch-word']])
|
|
|
|
|
if "signature-mark" in types_text:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_signature_mark, color=labels_rgb_color[ config_params['textregions']['signature-mark']])
|
|
|
|
|
if "page-number" in types_text:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_page_number, color=labels_rgb_color[ config_params['textregions']['page-number']])
|
|
|
|
|
if "marginalia" in types_text:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_marginalia, color=labels_rgb_color[ config_params['textregions']['marginalia']])
|
|
|
|
|
if "drop-capital" in types_text:
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_drop, color=labels_rgb_color[ config_params['textregions']['drop-capital']])
|
|
|
|
|
if 'rest_as_paragraph' in types_text:
|
|
|
|
|
types_text[types_text=='rest_as_paragraph'] = 'paragraph'
|
|
|
|
|
for element_text in types_text:
|
|
|
|
|
if element_text == 'paragraph':
|
|
|
|
|
color_label = labels_rgb_color[ config_params['textregions']['rest_as_paragraph']]
|
|
|
|
|
else:
|
|
|
|
|
color_label = labels_rgb_color[ config_params['textregions'][element_text]]
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label)
|
|
|
|
|
else:
|
|
|
|
|
for element_text in types_text:
|
|
|
|
|
color_label = labels_rgb_color[ config_params['textregions'][element_text]]
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if "artificial_class_on_boundry" in keys:
|
|
|
|
|
img_poly[:,:,0][img_boundary[:,:]==1] = artificial_class_rgb_color[0]
|
|
|
|
@ -678,18 +571,19 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
|
|
|
|
|
elif output_type == '2d':
|
|
|
|
|
if 'graphicregions' in keys:
|
|
|
|
|
if "handwritten-annotation" in types_graphic:
|
|
|
|
|
color_label = config_params['graphicregions']['handwritten-annotation']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_graphic_text_annotation, color=(color_label,color_label,color_label))
|
|
|
|
|
if "signature" in types_graphic:
|
|
|
|
|
color_label = config_params['graphicregions']['signature']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_graphic_signature, color=(color_label,color_label,color_label))
|
|
|
|
|
if "decoration" in types_graphic:
|
|
|
|
|
color_label = config_params['graphicregions']['decoration']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_graphic_decoration, color=(color_label,color_label,color_label))
|
|
|
|
|
if "stamp" in types_graphic:
|
|
|
|
|
color_label = config_params['graphicregions']['stamp']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_graphic_stamp, color=(color_label,color_label,color_label))
|
|
|
|
|
if 'rest_as_decoration' in types_graphic:
|
|
|
|
|
types_graphic[types_graphic=='rest_as_decoration'] = 'decoration'
|
|
|
|
|
for element_graphic in types_graphic:
|
|
|
|
|
if element_graphic == 'decoration':
|
|
|
|
|
color_label = config_params['graphicregions']['rest_as_decoration']
|
|
|
|
|
else:
|
|
|
|
|
color_label = config_params['graphicregions'][element_graphic]
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label)
|
|
|
|
|
else:
|
|
|
|
|
for element_graphic in types_graphic:
|
|
|
|
|
color_label = config_params['graphicregions'][element_graphic]
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if 'imageregion' in keys:
|
|
|
|
|
color_label = config_params['imageregion']
|
|
|
|
@ -705,36 +599,18 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_noise, color=(color_label,color_label,color_label))
|
|
|
|
|
|
|
|
|
|
if 'textregions' in keys:
|
|
|
|
|
if "paragraph" in types_text:
|
|
|
|
|
color_label = config_params['textregions']['paragraph']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_paragraph, color=(color_label,color_label,color_label))
|
|
|
|
|
if "footnote" in types_text:
|
|
|
|
|
color_label = config_params['textregions']['footnote']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_footnote, color=(color_label,color_label,color_label))
|
|
|
|
|
if "footnote-continued" in types_text:
|
|
|
|
|
color_label = config_params['textregions']['footnote-continued']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_footnote_con, color=(color_label,color_label,color_label))
|
|
|
|
|
if "heading" in types_text:
|
|
|
|
|
color_label = config_params['textregions']['heading']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_heading, color=(color_label,color_label,color_label))
|
|
|
|
|
if "header" in types_text:
|
|
|
|
|
color_label = config_params['textregions']['header']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_header, color=(color_label,color_label,color_label))
|
|
|
|
|
if "catch-word" in types_text:
|
|
|
|
|
color_label = config_params['textregions']['catch-word']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_catch, color=(color_label,color_label,color_label))
|
|
|
|
|
if "signature-mark" in types_text:
|
|
|
|
|
color_label = config_params['textregions']['signature-mark']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_signature_mark, color=(color_label,color_label,color_label))
|
|
|
|
|
if "page-number" in types_text:
|
|
|
|
|
color_label = config_params['textregions']['page-number']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_page_number, color=(color_label,color_label,color_label))
|
|
|
|
|
if "marginalia" in types_text:
|
|
|
|
|
color_label = config_params['textregions']['marginalia']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_marginalia, color=(color_label,color_label,color_label))
|
|
|
|
|
if "drop-capital" in types_text:
|
|
|
|
|
color_label = config_params['textregions']['drop-capital']
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text_drop, color=(color_label,color_label,color_label))
|
|
|
|
|
if 'rest_as_paragraph' in types_text:
|
|
|
|
|
types_text[types_text=='rest_as_paragraph'] = 'paragraph'
|
|
|
|
|
for element_text in types_text:
|
|
|
|
|
if element_text == 'paragraph':
|
|
|
|
|
color_label = config_params['textregions']['rest_as_paragraph']
|
|
|
|
|
else:
|
|
|
|
|
color_label = config_params['textregions'][element_text]
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label)
|
|
|
|
|
else:
|
|
|
|
|
for element_text in types_text:
|
|
|
|
|
color_label = config_params['textregions'][element_text]
|
|
|
|
|
img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label)
|
|
|
|
|
|
|
|
|
|
if "artificial_class_on_boundry" in keys:
|
|
|
|
|
img_poly[:,:][img_boundary[:,:]==1] = artificial_class_label
|
|
|
|
@ -947,51 +823,51 @@ def read_xml(xml_file):
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='drop-capital':
|
|
|
|
|
#if nn.attrib['type']=='paragraph':
|
|
|
|
|
|
|
|
|
|
c_t_in_drop.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in_drop.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='heading':
|
|
|
|
|
id_heading.append(nn.attrib['id'])
|
|
|
|
|
c_t_in_heading.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in_heading.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='signature-mark':
|
|
|
|
|
|
|
|
|
|
c_t_in_signature_mark.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in_signature_mark.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
#print(c_t_in_paragraph)
|
|
|
|
|
sumi+=1
|
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='header':
|
|
|
|
|
id_header.append(nn.attrib['id'])
|
|
|
|
|
c_t_in_header.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in_header.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='catch-word':
|
|
|
|
|
c_t_in_catch.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in_catch.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='page-number':
|
|
|
|
|
|
|
|
|
|
c_t_in_page_number.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in_page_number.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
#print(c_t_in_paragraph)
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='marginalia':
|
|
|
|
|
id_marginalia.append(nn.attrib['id'])
|
|
|
|
|
|
|
|
|
|
c_t_in_marginalia.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in_marginalia.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
#print(c_t_in_paragraph)
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
id_paragraph.append(nn.attrib['id'])
|
|
|
|
|
c_t_in_paragraph.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in_paragraph.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
#print(c_t_in_paragraph)
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
#c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
#c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
|
|
|
|
|
#print(vv.tag,'in')
|
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
|
|
@ -1057,16 +933,16 @@ def read_xml(xml_file):
|
|
|
|
|
if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation':
|
|
|
|
|
#if nn.attrib['type']=='paragraph':
|
|
|
|
|
|
|
|
|
|
c_t_in_text_annotation.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in_text_annotation.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
elif "type" in nn.attrib and nn.attrib['type']=='decoration':
|
|
|
|
|
|
|
|
|
|
c_t_in_decoration.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in_decoration.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
#print(c_t_in_paragraph)
|
|
|
|
|
sumi+=1
|
|
|
|
|
else:
|
|
|
|
|
c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
|
|
|
|
|
if len(c_t_in_text_annotation)>0:
|
|
|
|
@ -1096,7 +972,7 @@ def read_xml(xml_file):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
|
|
|
c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
#print(vv.tag,'in')
|
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
|
|
@ -1123,7 +999,7 @@ def read_xml(xml_file):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
|
|
|
c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
#print(vv.tag,'in')
|
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
|
|
@ -1150,7 +1026,7 @@ def read_xml(xml_file):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
|
|
|
c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
#print(vv.tag,'in')
|
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
|
|
@ -1176,7 +1052,7 @@ def read_xml(xml_file):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if vv.tag==link+'Point':
|
|
|
|
|
c_t_in.append([ int(np.float(vv.attrib['x'])) , int(np.float(vv.attrib['y'])) ])
|
|
|
|
|
c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ])
|
|
|
|
|
sumi+=1
|
|
|
|
|
#print(vv.tag,'in')
|
|
|
|
|
elif vv.tag!=link+'Point' and sumi>=1:
|
|
|
|
|