From 1c8873ffa3bb5fdf53bc23babca62268a3f04d73 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 6 Jun 2024 18:45:47 +0200 Subject: [PATCH] just defined textregion types can be extracted as label --- gt_gen_utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/gt_gen_utils.py b/gt_gen_utils.py index 38e77e8..86eb0a1 100644 --- a/gt_gen_utils.py +++ b/gt_gen_utils.py @@ -325,6 +325,7 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ region_tags=np.unique([x for x in alltags if x.endswith('Region')]) co_text = {'drop-capital':[], "footnote":[], "footnote-continued":[], "heading":[], "signature-mark":[], "header":[], "catch-word":[], "page-number":[], "marginalia":[], "paragraph":[]} + all_defined_textregion_types = list(co_text.keys()) co_graphic = {"handwritten-annotation":[], "decoration":[], "stamp":[], "signature":[]} co_sep=[] co_img=[] @@ -359,7 +360,8 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ else: if "type" in nn.attrib: - c_t_in[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + if nn.attrib['type'] in all_defined_textregion_types: + c_t_in[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: @@ -384,8 +386,9 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ else: if "type" in nn.attrib: - c_t_in[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) - sumi+=1 + if nn.attrib['type'] in all_defined_textregion_types: + c_t_in[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) + sumi+=1 elif vv.tag!=link+'Point' and sumi>=1: