import click import sys import os import numpy as np import warnings import xml.etree.ElementTree as ET from tqdm import tqdm import cv2 from shapely import geometry from pathlib import Path KERNEL = np.ones((5, 5), np.uint8) with warnings.catch_warnings(): warnings.simplefilter("ignore") def get_content_of_dir(dir_in): """ Listing all ground truth page xml files. All files are needed to have xml format. """ gt_all=os.listdir(dir_in) gt_list=[file for file in gt_all if file.split('.')[ len(file.split('.'))-1 ]=='xml' ] return gt_list def return_parent_contours(contours, hierarchy): contours_parent = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3] == -1] return contours_parent def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): found_polygons_early = list() jv = 0 for c in contours: if len(np.shape(c)) == 3: c = c[0] elif len(np.shape(c)) == 2: pass #c = c[0] if len(c) < 3: # A polygon cannot have less than 3 points continue c_e = [point for point in c] polygon = geometry.Polygon(c_e) # area = cv2.contourArea(c) area = polygon.area # Check that polygon has area greater than minimal area if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]): # and hierarchy[0][jv][3]==-1 : found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32)) jv += 1 return found_polygons_early def filter_contours_area_of_image(image, contours, order_index, max_area, min_area): found_polygons_early = list() order_index_filtered = list() #jv = 0 for jv, c in enumerate(contours): if len(np.shape(c)) == 3: c = c[0] elif len(np.shape(c)) == 2: pass if len(c) < 3: # A polygon cannot have less than 3 points continue c_e = [point for point in c] polygon = geometry.Polygon(c_e) area = polygon.area if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]): # and hierarchy[0][jv][3]==-1 : found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint)) order_index_filtered.append(order_index[jv]) #jv += 1 return found_polygons_early, order_index_filtered def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: cnts_images = (region_pre_p[:, :, 0] == pixel) * 1 else: cnts_images = (region_pre_p[:, :] == pixel) * 1 cnts_images = cnts_images.astype(np.uint8) cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=1, min_area=min_area) return contours_imgs def update_region_contours(co_text, img_boundary, erosion_rate, dilation_rate, y_len, x_len): co_text_eroded = [] for con in co_text: img_boundary_in = np.zeros( (y_len,x_len) ) img_boundary_in = cv2.fillPoly(img_boundary_in, pts=[con], color=(1, 1, 1)) #img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=7)#asiatica if erosion_rate > 0: img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=erosion_rate) pixel = 1 min_size = 0 con_eroded = return_contours_of_interested_region(img_boundary_in,pixel, min_size ) try: co_text_eroded.append(con_eroded[0]) except: co_text_eroded.append(con) img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=dilation_rate) #img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=5) boundary = img_boundary_in_dilated[:,:] - img_boundary_in[:,:] img_boundary[:,:][boundary[:,:]==1] =1 return co_text_eroded, img_boundary def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_file, config_params): """ Reading the page xml files and write the ground truth images into given output directory. """ ## to do: add footnote to text regions for index in tqdm(range(len(gt_list))): #try: tree1 = ET.parse(dir_in+'/'+gt_list[index]) root1=tree1.getroot() alltags=[elem.tag for elem in root1.iter()] link=alltags[0].split('}')[0]+'}' for jj in root1.iter(link+'Page'): y_len=int(jj.attrib['imageHeight']) x_len=int(jj.attrib['imageWidth']) if config_file and (config_params['use_case']=='textline' or config_params['use_case']=='word' or config_params['use_case']=='glyph' or config_params['use_case']=='printspace'): keys = list(config_params.keys()) if "artificial_class_label" in keys: artificial_class_rgb_color = (255,255,0) artificial_class_label = config_params['artificial_class_label'] textline_rgb_color = (255, 0, 0) if config_params['use_case']=='textline': region_tags = np.unique([x for x in alltags if x.endswith('TextLine')]) elif config_params['use_case']=='word': region_tags = np.unique([x for x in alltags if x.endswith('Word')]) elif config_params['use_case']=='glyph': region_tags = np.unique([x for x in alltags if x.endswith('Glyph')]) elif config_params['use_case']=='printspace': region_tags = np.unique([x for x in alltags if x.endswith('PrintSpace')]) co_use_case = [] for tag in region_tags: if config_params['use_case']=='textline': tag_endings = ['}TextLine','}textline'] elif config_params['use_case']=='word': tag_endings = ['}Word','}word'] elif config_params['use_case']=='glyph': tag_endings = ['}Glyph','}glyph'] elif config_params['use_case']=='printspace': tag_endings = ['}PrintSpace','}printspace'] if tag.endswith(tag_endings[0]) or tag.endswith(tag_endings[1]): for nn in root1.iter(tag): c_t_in = [] sumi = 0 for vv in nn.iter(): # check the format of coords if vv.tag == link + 'Coords': coords = bool(vv.attrib) if coords: p_h = vv.attrib['points'].split(' ') c_t_in.append( np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) break else: pass if vv.tag == link + 'Point': c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))]) sumi += 1 elif vv.tag != link + 'Point' and sumi >= 1: break co_use_case.append(np.array(c_t_in)) if "artificial_class_label" in keys: img_boundary = np.zeros((y_len, x_len)) erosion_rate = 1 dilation_rate = 3 co_use_case, img_boundary = update_region_contours(co_use_case, img_boundary, erosion_rate, dilation_rate, y_len, x_len ) img = np.zeros((y_len, x_len, 3)) if output_type == '2d': img_poly = cv2.fillPoly(img, pts=co_use_case, color=(1, 1, 1)) if "artificial_class_label" in keys: img_poly[:,:][img_boundary[:,:]==1] = artificial_class_label elif output_type == '3d': img_poly = cv2.fillPoly(img, pts=co_use_case, color=textline_rgb_color) if "artificial_class_label" in keys: img_poly[:,:,0][img_boundary[:,:]==1] = artificial_class_rgb_color[0] img_poly[:,:,1][img_boundary[:,:]==1] = artificial_class_rgb_color[1] img_poly[:,:,2][img_boundary[:,:]==1] = artificial_class_rgb_color[2] try: cv2.imwrite(output_dir + '/' + gt_list[index].split('-')[1].split('.')[0] + '.png', img_poly) except: cv2.imwrite(output_dir + '/' + gt_list[index].split('.')[0] + '.png', img_poly) if config_file and config_params['use_case']=='layout': keys = list(config_params.keys()) if "artificial_class_on_boundry" in keys: elements_with_artificial_class = list(config_params['artificial_class_on_boundry']) artificial_class_rgb_color = (255,255,0) artificial_class_label = config_params['artificial_class_label'] #values = config_params.values() if 'textregions' in keys: types_text_dict = config_params['textregions'] types_text = list(types_text_dict.keys()) types_text_label = list(types_text_dict.values()) if 'graphicregions' in keys: types_graphic_dict = config_params['graphicregions'] types_graphic = list(types_graphic_dict.keys()) types_graphic_label = list(types_graphic_dict.values()) labels_rgb_color = [ (0,0,0), (255,0,0), (255,125,0), (255,0,125), (125,255,125), (125,125,0), (0,125,255), (0,125,0), (125,125,125), (255,0,255), (125,0,125), (0,255,0),(0,0,255), (0,255,255), (255,125,125), (0,125,125), (0,255,125), (255,125,255), (125,255,0)] region_tags=np.unique([x for x in alltags if x.endswith('Region')]) co_text = {'drop-capital':[], "footnote":[], "footnote-continued":[], "heading":[], "signature-mark":[], "header":[], "catch-word":[], "page-number":[], "marginalia":[], "paragraph":[]} co_graphic = {"handwritten-annotation":[], "decoration":[], "stamp":[], "signature":[]} co_sep=[] co_img=[] co_table=[] co_noise=[] for tag in region_tags: if 'textregions' in keys: if tag.endswith('}TextRegion') or tag.endswith('}Textregion'): for nn in root1.iter(tag): c_t_in = {'drop-capital':[], "footnote":[], "footnote-continued":[], "heading":[], "signature-mark":[], "header":[], "catch-word":[], "page-number":[], "marginalia":[], "paragraph":[]} sumi=0 for vv in nn.iter(): # check the format of coords if vv.tag==link+'Coords': coords=bool(vv.attrib) if coords: p_h=vv.attrib['points'].split(' ') if "rest_as_paragraph" in types_text: types_text_without_paragraph = [element for element in types_text if element!='rest_as_paragraph' and element!='paragraph'] if len(types_text_without_paragraph) == 0: if "type" in nn.attrib: c_t_in['paragraph'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) elif len(types_text_without_paragraph) >= 1: if "type" in nn.attrib: if nn.attrib['type'] in types_text_without_paragraph: c_t_in[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) else: c_t_in['paragraph'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) else: if "type" in nn.attrib: c_t_in[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: pass if vv.tag==link+'Point': if "rest_as_paragraph" in types_text: types_text_without_paragraph = [element for element in types_text if element!='rest_as_paragraph' and element!='paragraph'] if len(types_text_without_paragraph) == 0: if "type" in nn.attrib: c_t_in['paragraph'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) sumi+=1 elif len(types_text_without_paragraph) >= 1: if "type" in nn.attrib: if nn.attrib['type'] in types_text_without_paragraph: c_t_in[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) sumi+=1 else: c_t_in['paragraph'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) sumi+=1 else: if "type" in nn.attrib: c_t_in[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) sumi+=1 elif vv.tag!=link+'Point' and sumi>=1: break for element_text in list(c_t_in.keys()): if len(c_t_in[element_text])>0: co_text[element_text].append(np.array(c_t_in[element_text])) if 'graphicregions' in keys: if tag.endswith('}GraphicRegion') or tag.endswith('}graphicregion'): #print('sth') for nn in root1.iter(tag): c_t_in_graphic = {"handwritten-annotation":[], "decoration":[], "stamp":[], "signature":[]} sumi=0 for vv in nn.iter(): # check the format of coords if vv.tag==link+'Coords': coords=bool(vv.attrib) if coords: p_h=vv.attrib['points'].split(' ') if "rest_as_decoration" in types_graphic: types_graphic_without_decoration = [element for element in types_graphic if element!='rest_as_decoration' and element!='decoration'] if len(types_graphic_without_decoration) == 0: if "type" in nn.attrib: c_t_in_graphic['decoration'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) elif len(types_graphic_without_decoration) >= 1: if "type" in nn.attrib: if nn.attrib['type'] in types_graphic_without_decoration: c_t_in_graphic[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) else: c_t_in_graphic['decoration'].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) else: if "type" in nn.attrib: c_t_in_graphic[nn.attrib['type']].append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: pass if vv.tag==link+'Point': if "rest_as_decoration" in types_graphic: types_graphic_without_decoration = [element for element in types_graphic if element!='rest_as_decoration' and element!='decoration'] if len(types_graphic_without_decoration) == 0: if "type" in nn.attrib: c_t_in_graphic['decoration'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) sumi+=1 elif len(types_graphic_without_decoration) >= 1: if "type" in nn.attrib: if nn.attrib['type'] in types_graphic_without_decoration: c_t_in_graphic[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) sumi+=1 else: c_t_in_graphic['decoration'].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) sumi+=1 else: if "type" in nn.attrib: c_t_in_graphic[nn.attrib['type']].append( [ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ] ) sumi+=1 elif vv.tag!=link+'Point' and sumi>=1: break for element_graphic in list(c_t_in_graphic.keys()): if len(c_t_in_graphic[element_graphic])>0: co_graphic[element_graphic].append(np.array(c_t_in_graphic[element_graphic])) if 'imageregion' in keys: if tag.endswith('}ImageRegion') or tag.endswith('}imageregion'): for nn in root1.iter(tag): c_t_in=[] sumi=0 for vv in nn.iter(): if vv.tag==link+'Coords': coords=bool(vv.attrib) if coords: p_h=vv.attrib['points'].split(' ') c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: pass if vv.tag==link+'Point': c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 elif vv.tag!=link+'Point' and sumi>=1: break co_img.append(np.array(c_t_in)) if 'separatorregion' in keys: if tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'): for nn in root1.iter(tag): c_t_in=[] sumi=0 for vv in nn.iter(): # check the format of coords if vv.tag==link+'Coords': coords=bool(vv.attrib) if coords: p_h=vv.attrib['points'].split(' ') c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: pass if vv.tag==link+'Point': c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 elif vv.tag!=link+'Point' and sumi>=1: break co_sep.append(np.array(c_t_in)) if 'tableregion' in keys: if tag.endswith('}TableRegion') or tag.endswith('}tableregion'): #print('sth') for nn in root1.iter(tag): c_t_in=[] sumi=0 for vv in nn.iter(): # check the format of coords if vv.tag==link+'Coords': coords=bool(vv.attrib) if coords: p_h=vv.attrib['points'].split(' ') c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: pass if vv.tag==link+'Point': c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 #print(vv.tag,'in') elif vv.tag!=link+'Point' and sumi>=1: break co_table.append(np.array(c_t_in)) if 'noiseregion' in keys: if tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'): #print('sth') for nn in root1.iter(tag): c_t_in=[] sumi=0 for vv in nn.iter(): # check the format of coords if vv.tag==link+'Coords': coords=bool(vv.attrib) if coords: p_h=vv.attrib['points'].split(' ') c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: pass if vv.tag==link+'Point': c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 #print(vv.tag,'in') elif vv.tag!=link+'Point' and sumi>=1: break co_noise.append(np.array(c_t_in)) if "artificial_class_on_boundry" in keys: img_boundary = np.zeros( (y_len,x_len) ) if "paragraph" in elements_with_artificial_class: erosion_rate = 2 dilation_rate = 4 co_text['paragraph'], img_boundary = update_region_contours(co_text['paragraph'], img_boundary, erosion_rate, dilation_rate, y_len, x_len ) if "drop-capital" in elements_with_artificial_class: erosion_rate = 0 dilation_rate = 4 co_text["drop-capital"], img_boundary = update_region_contours(co_text["drop-capital"], img_boundary, erosion_rate, dilation_rate, y_len, x_len ) if "catch-word" in elements_with_artificial_class: erosion_rate = 0 dilation_rate = 4 co_text["catch-word"], img_boundary = update_region_contours(co_text["catch-word"], img_boundary, erosion_rate, dilation_rate, y_len, x_len ) if "page-number" in elements_with_artificial_class: erosion_rate = 0 dilation_rate = 4 co_text["page-number"], img_boundary = update_region_contours(co_text["page-number"], img_boundary, erosion_rate, dilation_rate, y_len, x_len ) if "header" in elements_with_artificial_class: erosion_rate = 1 dilation_rate = 4 co_text["header"], img_boundary = update_region_contours(co_text["header"], img_boundary, erosion_rate, dilation_rate, y_len, x_len ) if "heading" in elements_with_artificial_class: erosion_rate = 1 dilation_rate = 4 co_text["heading"], img_boundary = update_region_contours(co_text["heading"], img_boundary, erosion_rate, dilation_rate, y_len, x_len ) if "signature-mark" in elements_with_artificial_class: erosion_rate = 1 dilation_rate = 4 co_text["signature-mark"], img_boundary = update_region_contours(co_text["signature-mark"], img_boundary, erosion_rate, dilation_rate, y_len, x_len ) if "marginalia" in elements_with_artificial_class: erosion_rate = 2 dilation_rate = 4 co_text["marginalia"], img_boundary = update_region_contours(co_text["marginalia"], img_boundary, erosion_rate, dilation_rate, y_len, x_len ) if "footnote" in elements_with_artificial_class: erosion_rate = 2 dilation_rate = 4 co_text["footnote"], img_boundary = update_region_contours(co_text["footnote"], img_boundary, erosion_rate, dilation_rate, y_len, x_len ) if "footnote-continued" in elements_with_artificial_class: erosion_rate = 2 dilation_rate = 4 co_text["footnote-continued"], img_boundary = update_region_contours(co_text["footnote-continued"], img_boundary, erosion_rate, dilation_rate, y_len, x_len ) img = np.zeros( (y_len,x_len,3) ) if output_type == '3d': if 'graphicregions' in keys: if 'rest_as_decoration' in types_graphic: types_graphic[types_graphic=='rest_as_decoration'] = 'decoration' for element_graphic in types_graphic: if element_graphic == 'decoration': color_label = labels_rgb_color[ config_params['graphicregions']['rest_as_decoration']] else: color_label = labels_rgb_color[ config_params['graphicregions'][element_graphic]] img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label) else: for element_graphic in types_graphic: color_label = labels_rgb_color[ config_params['graphicregions'][element_graphic]] img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label) if 'imageregion' in keys: img_poly=cv2.fillPoly(img, pts =co_img, color=labels_rgb_color[ config_params['imageregion']]) if 'separatorregion' in keys: img_poly=cv2.fillPoly(img, pts =co_sep, color=labels_rgb_color[ config_params['separatorregion']]) if 'tableregion' in keys: img_poly=cv2.fillPoly(img, pts =co_table, color=labels_rgb_color[ config_params['tableregion']]) if 'noiseregion' in keys: img_poly=cv2.fillPoly(img, pts =co_noise, color=labels_rgb_color[ config_params['noiseregion']]) if 'textregions' in keys: if 'rest_as_paragraph' in types_text: types_text[types_text=='rest_as_paragraph'] = 'paragraph' for element_text in types_text: if element_text == 'paragraph': color_label = labels_rgb_color[ config_params['textregions']['rest_as_paragraph']] else: color_label = labels_rgb_color[ config_params['textregions'][element_text]] img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label) else: for element_text in types_text: color_label = labels_rgb_color[ config_params['textregions'][element_text]] img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label) if "artificial_class_on_boundry" in keys: img_poly[:,:,0][img_boundary[:,:]==1] = artificial_class_rgb_color[0] img_poly[:,:,1][img_boundary[:,:]==1] = artificial_class_rgb_color[1] img_poly[:,:,2][img_boundary[:,:]==1] = artificial_class_rgb_color[2] elif output_type == '2d': if 'graphicregions' in keys: if 'rest_as_decoration' in types_graphic: types_graphic[types_graphic=='rest_as_decoration'] = 'decoration' for element_graphic in types_graphic: if element_graphic == 'decoration': color_label = config_params['graphicregions']['rest_as_decoration'] else: color_label = config_params['graphicregions'][element_graphic] img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label) else: for element_graphic in types_graphic: color_label = config_params['graphicregions'][element_graphic] img_poly=cv2.fillPoly(img, pts =co_graphic[element_graphic], color=color_label) if 'imageregion' in keys: color_label = config_params['imageregion'] img_poly=cv2.fillPoly(img, pts =co_img, color=(color_label,color_label,color_label)) if 'separatorregion' in keys: color_label = config_params['separatorregion'] img_poly=cv2.fillPoly(img, pts =co_sep, color=(color_label,color_label,color_label)) if 'tableregion' in keys: color_label = config_params['tableregion'] img_poly=cv2.fillPoly(img, pts =co_table, color=(color_label,color_label,color_label)) if 'noiseregion' in keys: color_label = config_params['noiseregion'] img_poly=cv2.fillPoly(img, pts =co_noise, color=(color_label,color_label,color_label)) if 'textregions' in keys: if 'rest_as_paragraph' in types_text: types_text[types_text=='rest_as_paragraph'] = 'paragraph' for element_text in types_text: if element_text == 'paragraph': color_label = config_params['textregions']['rest_as_paragraph'] else: color_label = config_params['textregions'][element_text] img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label) else: for element_text in types_text: color_label = config_params['textregions'][element_text] img_poly=cv2.fillPoly(img, pts =co_text[element_text], color=color_label) if "artificial_class_on_boundry" in keys: img_poly[:,:][img_boundary[:,:]==1] = artificial_class_label try: cv2.imwrite(output_dir+'/'+gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) except: cv2.imwrite(output_dir+'/'+gt_list[index].split('.')[0]+'.png',img_poly ) def find_new_features_of_contours(contours_main): areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] try: x_min_main = np.array([np.min(contours_main[j][0][:, 0]) for j in range(len(contours_main))]) argmin_x_main = np.array([np.argmin(contours_main[j][0][:, 0]) for j in range(len(contours_main))]) x_min_from_argmin = np.array([contours_main[j][0][argmin_x_main[j], 0] for j in range(len(contours_main))]) y_corr_x_min_from_argmin = np.array([contours_main[j][0][argmin_x_main[j], 1] for j in range(len(contours_main))]) x_max_main = np.array([np.max(contours_main[j][0][:, 0]) for j in range(len(contours_main))]) y_min_main = np.array([np.min(contours_main[j][0][:, 1]) for j in range(len(contours_main))]) y_max_main = np.array([np.max(contours_main[j][0][:, 1]) for j in range(len(contours_main))]) except: x_min_main = np.array([np.min(contours_main[j][:, 0]) for j in range(len(contours_main))]) argmin_x_main = np.array([np.argmin(contours_main[j][:, 0]) for j in range(len(contours_main))]) x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0] for j in range(len(contours_main))]) y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1] for j in range(len(contours_main))]) x_max_main = np.array([np.max(contours_main[j][:, 0]) for j in range(len(contours_main))]) y_min_main = np.array([np.min(contours_main[j][:, 1]) for j in range(len(contours_main))]) y_max_main = np.array([np.max(contours_main[j][:, 1]) for j in range(len(contours_main))]) return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin def read_xml(xml_file): file_name = Path(xml_file).stem tree1 = ET.parse(xml_file) root1=tree1.getroot() alltags=[elem.tag for elem in root1.iter()] link=alltags[0].split('}')[0]+'}' index_tot_regions = [] tot_region_ref = [] for jj in root1.iter(link+'Page'): y_len=int(jj.attrib['imageHeight']) x_len=int(jj.attrib['imageWidth']) for jj in root1.iter(link+'RegionRefIndexed'): index_tot_regions.append(jj.attrib['index']) tot_region_ref.append(jj.attrib['regionRef']) if (link+'PrintSpace' in alltags) or (link+'Border' in alltags): co_printspace = [] if link+'PrintSpace' in alltags: region_tags_printspace = np.unique([x for x in alltags if x.endswith('PrintSpace')]) elif link+'Border' in alltags: region_tags_printspace = np.unique([x for x in alltags if x.endswith('Border')]) for tag in region_tags_printspace: if link+'PrintSpace' in alltags: tag_endings_printspace = ['}PrintSpace','}printspace'] elif link+'Border' in alltags: tag_endings_printspace = ['}Border','}border'] if tag.endswith(tag_endings_printspace[0]) or tag.endswith(tag_endings_printspace[1]): for nn in root1.iter(tag): c_t_in = [] sumi = 0 for vv in nn.iter(): # check the format of coords if vv.tag == link + 'Coords': coords = bool(vv.attrib) if coords: p_h = vv.attrib['points'].split(' ') c_t_in.append( np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) break else: pass if vv.tag == link + 'Point': c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))]) sumi += 1 elif vv.tag != link + 'Point' and sumi >= 1: break co_printspace.append(np.array(c_t_in)) img_printspace = np.zeros( (y_len,x_len,3) ) img_printspace=cv2.fillPoly(img_printspace, pts =co_printspace, color=(1,1,1)) img_printspace = img_printspace.astype(np.uint8) imgray = cv2.cvtColor(img_printspace, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] x, y, w, h = cv2.boundingRect(cnt) bb_coord_printspace = [x, y, w, h] else: bb_coord_printspace = None region_tags=np.unique([x for x in alltags if x.endswith('Region')]) co_text_paragraph=[] co_text_drop=[] co_text_heading=[] co_text_header=[] co_text_marginalia=[] co_text_catch=[] co_text_page_number=[] co_text_signature_mark=[] co_sep=[] co_img=[] co_table=[] co_graphic=[] co_graphic_text_annotation=[] co_graphic_decoration=[] co_noise=[] co_text_paragraph_text=[] co_text_drop_text=[] co_text_heading_text=[] co_text_header_text=[] co_text_marginalia_text=[] co_text_catch_text=[] co_text_page_number_text=[] co_text_signature_mark_text=[] co_sep_text=[] co_img_text=[] co_table_text=[] co_graphic_text=[] co_graphic_text_annotation_text=[] co_graphic_decoration_text=[] co_noise_text=[] id_paragraph = [] id_header = [] id_heading = [] id_marginalia = [] for tag in region_tags: if tag.endswith('}TextRegion') or tag.endswith('}Textregion'): for nn in root1.iter(tag): for child2 in nn: tag2 = child2.tag if tag2.endswith('}TextEquiv') or tag2.endswith('}TextEquiv'): for childtext2 in child2: if childtext2.tag.endswith('}Unicode') or childtext2.tag.endswith('}Unicode'): if "type" in nn.attrib and nn.attrib['type']=='drop-capital': co_text_drop_text.append(childtext2.text) elif "type" in nn.attrib and nn.attrib['type']=='heading': co_text_heading_text.append(childtext2.text) elif "type" in nn.attrib and nn.attrib['type']=='signature-mark': co_text_signature_mark_text.append(childtext2.text) elif "type" in nn.attrib and nn.attrib['type']=='header': co_text_header_text.append(childtext2.text) ###elif "type" in nn.attrib and nn.attrib['type']=='catch-word': ###co_text_catch_text.append(childtext2.text) ###elif "type" in nn.attrib and nn.attrib['type']=='page-number': ###co_text_page_number_text.append(childtext2.text) elif "type" in nn.attrib and nn.attrib['type']=='marginalia': co_text_marginalia_text.append(childtext2.text) else: co_text_paragraph_text.append(childtext2.text) c_t_in_drop=[] c_t_in_paragraph=[] c_t_in_heading=[] c_t_in_header=[] c_t_in_page_number=[] c_t_in_signature_mark=[] c_t_in_catch=[] c_t_in_marginalia=[] sumi=0 for vv in nn.iter(): # check the format of coords if vv.tag==link+'Coords': coords=bool(vv.attrib) if coords: #print('birda1') p_h=vv.attrib['points'].split(' ') if "type" in nn.attrib and nn.attrib['type']=='drop-capital': c_t_in_drop.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) elif "type" in nn.attrib and nn.attrib['type']=='heading': ##id_heading.append(nn.attrib['id']) c_t_in_heading.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) elif "type" in nn.attrib and nn.attrib['type']=='signature-mark': c_t_in_signature_mark.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) #print(c_t_in_paragraph) elif "type" in nn.attrib and nn.attrib['type']=='header': #id_header.append(nn.attrib['id']) c_t_in_header.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) ###elif "type" in nn.attrib and nn.attrib['type']=='catch-word': ###c_t_in_catch.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) ###elif "type" in nn.attrib and nn.attrib['type']=='page-number': ###c_t_in_page_number.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) elif "type" in nn.attrib and nn.attrib['type']=='marginalia': #id_marginalia.append(nn.attrib['id']) c_t_in_marginalia.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) else: #id_paragraph.append(nn.attrib['id']) c_t_in_paragraph.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: pass if vv.tag==link+'Point': if "type" in nn.attrib and nn.attrib['type']=='drop-capital': c_t_in_drop.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 elif "type" in nn.attrib and nn.attrib['type']=='heading': #id_heading.append(nn.attrib['id']) c_t_in_heading.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 elif "type" in nn.attrib and nn.attrib['type']=='signature-mark': c_t_in_signature_mark.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 elif "type" in nn.attrib and nn.attrib['type']=='header': #id_header.append(nn.attrib['id']) c_t_in_header.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 ###elif "type" in nn.attrib and nn.attrib['type']=='catch-word': ###c_t_in_catch.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) ###sumi+=1 ###elif "type" in nn.attrib and nn.attrib['type']=='page-number': ###c_t_in_page_number.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) ###sumi+=1 elif "type" in nn.attrib and nn.attrib['type']=='marginalia': #id_marginalia.append(nn.attrib['id']) c_t_in_marginalia.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 else: #id_paragraph.append(nn.attrib['id']) c_t_in_paragraph.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 elif vv.tag!=link+'Point' and sumi>=1: break if len(c_t_in_drop)>0: co_text_drop.append(np.array(c_t_in_drop)) if len(c_t_in_paragraph)>0: co_text_paragraph.append(np.array(c_t_in_paragraph)) id_paragraph.append(nn.attrib['id']) if len(c_t_in_heading)>0: co_text_heading.append(np.array(c_t_in_heading)) id_heading.append(nn.attrib['id']) if len(c_t_in_header)>0: co_text_header.append(np.array(c_t_in_header)) id_header.append(nn.attrib['id']) if len(c_t_in_page_number)>0: co_text_page_number.append(np.array(c_t_in_page_number)) if len(c_t_in_catch)>0: co_text_catch.append(np.array(c_t_in_catch)) if len(c_t_in_signature_mark)>0: co_text_signature_mark.append(np.array(c_t_in_signature_mark)) if len(c_t_in_marginalia)>0: co_text_marginalia.append(np.array(c_t_in_marginalia)) id_marginalia.append(nn.attrib['id']) elif tag.endswith('}GraphicRegion') or tag.endswith('}graphicregion'): for nn in root1.iter(tag): c_t_in=[] c_t_in_text_annotation=[] c_t_in_decoration=[] sumi=0 for vv in nn.iter(): # check the format of coords if vv.tag==link+'Coords': coords=bool(vv.attrib) if coords: p_h=vv.attrib['points'].split(' ') if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation': c_t_in_text_annotation.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) elif "type" in nn.attrib and nn.attrib['type']=='decoration': c_t_in_decoration.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) else: c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: pass if vv.tag==link+'Point': if "type" in nn.attrib and nn.attrib['type']=='handwritten-annotation': c_t_in_text_annotation.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 elif "type" in nn.attrib and nn.attrib['type']=='decoration': c_t_in_decoration.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 else: c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 if len(c_t_in_text_annotation)>0: co_graphic_text_annotation.append(np.array(c_t_in_text_annotation)) if len(c_t_in_decoration)>0: co_graphic_decoration.append(np.array(c_t_in_decoration)) if len(c_t_in)>0: co_graphic.append(np.array(c_t_in)) elif tag.endswith('}ImageRegion') or tag.endswith('}imageregion'): for nn in root1.iter(tag): c_t_in=[] sumi=0 for vv in nn.iter(): # check the format of coords if vv.tag==link+'Coords': coords=bool(vv.attrib) if coords: p_h=vv.attrib['points'].split(' ') c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: pass if vv.tag==link+'Point': c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 elif vv.tag!=link+'Point' and sumi>=1: break co_img.append(np.array(c_t_in)) co_img_text.append(' ') elif tag.endswith('}SeparatorRegion') or tag.endswith('}separatorregion'): for nn in root1.iter(tag): c_t_in=[] sumi=0 for vv in nn.iter(): # check the format of coords if vv.tag==link+'Coords': coords=bool(vv.attrib) if coords: p_h=vv.attrib['points'].split(' ') c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: pass if vv.tag==link+'Point': c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 elif vv.tag!=link+'Point' and sumi>=1: break co_sep.append(np.array(c_t_in)) elif tag.endswith('}TableRegion') or tag.endswith('}tableregion'): for nn in root1.iter(tag): c_t_in=[] sumi=0 for vv in nn.iter(): # check the format of coords if vv.tag==link+'Coords': coords=bool(vv.attrib) if coords: p_h=vv.attrib['points'].split(' ') c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: pass if vv.tag==link+'Point': c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 elif vv.tag!=link+'Point' and sumi>=1: break co_table.append(np.array(c_t_in)) co_table_text.append(' ') elif tag.endswith('}NoiseRegion') or tag.endswith('}noiseregion'): for nn in root1.iter(tag): c_t_in=[] sumi=0 for vv in nn.iter(): # check the format of coords if vv.tag==link+'Coords': coords=bool(vv.attrib) if coords: p_h=vv.attrib['points'].split(' ') c_t_in.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) break else: pass if vv.tag==link+'Point': c_t_in.append([ int(float(vv.attrib['x'])) , int(float(vv.attrib['y'])) ]) sumi+=1 elif vv.tag!=link+'Point' and sumi>=1: break co_noise.append(np.array(c_t_in)) co_noise_text.append(' ') img = np.zeros( (y_len,x_len,3) ) img_poly=cv2.fillPoly(img, pts =co_text_paragraph, color=(1,1,1)) img_poly=cv2.fillPoly(img, pts =co_text_heading, color=(2,2,2)) img_poly=cv2.fillPoly(img, pts =co_text_header, color=(2,2,2)) img_poly=cv2.fillPoly(img, pts =co_text_marginalia, color=(3,3,3)) img_poly=cv2.fillPoly(img, pts =co_img, color=(4,4,4)) img_poly=cv2.fillPoly(img, pts =co_sep, color=(5,5,5)) return tree1, root1, bb_coord_printspace, file_name, id_paragraph, id_header+id_heading, co_text_paragraph, co_text_header+co_text_heading,\ tot_region_ref,x_len, y_len,index_tot_regions, img_poly def bounding_box(cnt,color, corr_order_index ): x, y, w, h = cv2.boundingRect(cnt) x = int(x*scale_w) y = int(y*scale_h) w = int(w*scale_w) h = int(h*scale_h) return [x,y,w,h,int(color), int(corr_order_index)+1] def resize_image(seg_in,input_height,input_width): return cv2.resize(seg_in,(input_width,input_height),interpolation=cv2.INTER_NEAREST) def make_image_from_bb(width_l, height_l, bb_all): bb_all =np.array(bb_all) img_remade = np.zeros((height_l,width_l )) for i in range(bb_all.shape[0]): img_remade[bb_all[i,1]:bb_all[i,1]+bb_all[i,3],bb_all[i,0]:bb_all[i,0]+bb_all[i,2] ] = 1 return img_remade def update_list_and_return_first_with_length_bigger_than_one(index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp): list_inp.pop(index_element_to_be_updated) if len(pr_list)>0: list_inp.insert(index_element_to_be_updated, pr_list) else: index_element_to_be_updated = index_element_to_be_updated -1 list_inp.insert(index_element_to_be_updated+1, [innner_index_pr_pos]) if len(pos_list)>0: list_inp.insert(index_element_to_be_updated+2, pos_list) len_all_elements = [len(i) for i in list_inp] list_len_bigger_1 = np.where(np.array(len_all_elements)>1) list_len_bigger_1 = list_len_bigger_1[0] if len(list_len_bigger_1)>0: early_list_bigger_than_one = list_len_bigger_1[0] else: early_list_bigger_than_one = -20 return list_inp, early_list_bigger_than_one