From dc356a5f426f71ab9b036b6acfd1fd7eb815e8ff Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 6 Jun 2024 18:55:22 +0200 Subject: [PATCH] just defined graphic region types can be extracted as label --- gt_gen_utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/gt_gen_utils.py b/gt_gen_utils.py index 86eb0a1..c2360fc 100644 --- a/gt_gen_utils.py +++ b/gt_gen_utils.py @@ -327,6 +327,7 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ co_text = {'drop-capital':[], "footnote":[], "footnote-continued":[], "heading":[], "signature-mark":[], "header":[], "catch-word":[], "page-number":[], "marginalia":[], "paragraph":[]} all_defined_textregion_types = list(co_text.keys()) co_graphic = {"handwritten-annotation":[], "decoration":[], "stamp":[], "signature":[]} + all_defined_graphic_types = list(co_graphic.keys()) co_sep=[] co_img=[] co_table=[] @@ -425,7 +426,8 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ else: if "type" in nn.attrib: - c_t_in_graphic[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + if nn.attrib['type'] in all_defined_graphic_types: + c_t_in_graphic[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: @@ -450,8 +452,9 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ else: if "type" in nn.attrib: - c_t_in_graphic[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) - sumi+=1 + if nn.attrib['type'] in all_defined_graphic_types: + c_t_in_graphic[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) + sumi+=1 elif vv.tag!=link+'Point' and sumi>=1: break