visulizing textline detection from eynollah page-xml output

pull/23/head
vahidrezanezhad 2 weeks ago
parent d554d26739
commit fc75770b73

@ -2,6 +2,7 @@ import click
import json import json
from gt_gen_utils import * from gt_gen_utils import *
from tqdm import tqdm from tqdm import tqdm
from pathlib import Path
@click.group() @click.group()
def main(): def main():
@ -331,6 +332,53 @@ def visualize_reading_order(dir_xml, dir_out, dir_imgs):
cv2.imwrite(os.path.join(dir_out, f_name+'.png'), img) cv2.imwrite(os.path.join(dir_out, f_name+'.png'), img)
@main.command()
@click.option(
"--dir_xml",
"-dx",
help="directory of GT page-xml files",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--dir_out",
"-do",
help="directory where plots will be written",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--dir_imgs",
"-dimg",
help="directory of images where textline segmentation will be overlayed", )
def visualize_textline_segmentation(dir_xml, dir_out, dir_imgs):
xml_files_ind = os.listdir(dir_xml)
for ind_xml in tqdm(xml_files_ind):
indexer = 0
#print(ind_xml)
#print('########################')
xml_file = os.path.join(dir_xml,ind_xml )
f_name = Path(ind_xml).stem
img_file_name_with_format = find_format_of_given_filename_in_dir(dir_imgs, f_name)
img = cv2.imread(os.path.join(dir_imgs, img_file_name_with_format))
co_tetxlines, y_len, x_len = get_textline_contours_for_visualization(xml_file)
img_total = np.zeros((y_len, x_len, 3))
for cont in co_tetxlines:
img_in = np.zeros((y_len, x_len, 3))
img_in = cv2.fillPoly(img_in, pts =[cont], color=(1,1,1))
img_total = img_total + img_in
img_total[:,:, 0][img_total[:,:, 0]>2] = 2
img_out, _ = visualize_model_output(img_total, img, task="textline")
cv2.imwrite(os.path.join(dir_out, f_name+'.png'), img_out)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

@ -16,6 +16,52 @@ KERNEL = np.ones((5, 5), np.uint8)
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("ignore") warnings.simplefilter("ignore")
def visualize_model_output(prediction, img, task):
if task == "binarization":
prediction = prediction * -1
prediction = prediction + 1
added_image = prediction * 255
layout_only = None
else:
unique_classes = np.unique(prediction[:,:,0])
rgb_colors = {'0' : [255, 255, 255],
'1' : [255, 0, 0],
'2' : [255, 125, 0],
'3' : [255, 0, 125],
'4' : [125, 125, 125],
'5' : [125, 125, 0],
'6' : [0, 125, 255],
'7' : [0, 125, 0],
'8' : [125, 125, 125],
'9' : [0, 125, 255],
'10' : [125, 0, 125],
'11' : [0, 255, 0],
'12' : [0, 0, 255],
'13' : [0, 255, 255],
'14' : [255, 125, 125],
'15' : [255, 0, 255]}
layout_only = np.zeros(prediction.shape)
for unq_class in unique_classes:
rgb_class_unique = rgb_colors[str(int(unq_class))]
layout_only[:,:,0][prediction[:,:,0]==unq_class] = rgb_class_unique[0]
layout_only[:,:,1][prediction[:,:,0]==unq_class] = rgb_class_unique[1]
layout_only[:,:,2][prediction[:,:,0]==unq_class] = rgb_class_unique[2]
img = resize_image(img, layout_only.shape[0], layout_only.shape[1])
layout_only = layout_only.astype(np.int32)
img = img.astype(np.int32)
added_image = cv2.addWeighted(img,0.5,layout_only,0.1,0)
return added_image, layout_only
def get_content_of_dir(dir_in): def get_content_of_dir(dir_in):
""" """
Listing all ground truth page xml files. All files are needed to have xml format. Listing all ground truth page xml files. All files are needed to have xml format.
@ -138,6 +184,48 @@ def update_region_contours(co_text, img_boundary, erosion_rate, dilation_rate, y
img_boundary[:,:][boundary[:,:]==1] =1 img_boundary[:,:][boundary[:,:]==1] =1
return co_text_eroded, img_boundary return co_text_eroded, img_boundary
def get_textline_contours_for_visualization(xml_file):
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding = 'iso-8859-5'))
root1=tree1.getroot()
alltags=[elem.tag for elem in root1.iter()]
link=alltags[0].split('}')[0]+'}'
for jj in root1.iter(link+'Page'):
y_len=int(jj.attrib['imageHeight'])
x_len=int(jj.attrib['imageWidth'])
region_tags = np.unique([x for x in alltags if x.endswith('TextLine')])
tag_endings = ['}TextLine','}textline']
co_use_case = []
for tag in region_tags:
if tag.endswith(tag_endings[0]) or tag.endswith(tag_endings[1]):
for nn in root1.iter(tag):
c_t_in = []
sumi = 0
for vv in nn.iter():
if vv.tag == link + 'Coords':
coords = bool(vv.attrib)
if coords:
p_h = vv.attrib['points'].split(' ')
c_t_in.append(
np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h]))
break
else:
pass
if vv.tag == link + 'Point':
c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))])
sumi += 1
elif vv.tag != link + 'Point' and sumi >= 1:
break
co_use_case.append(np.array(c_t_in))
return co_use_case, y_len, x_len
def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_file, config_params, printspace, dir_images, dir_out_images): def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_file, config_params, printspace, dir_images, dir_out_images):
""" """
Reading the page xml files and write the ground truth images into given output directory. Reading the page xml files and write the ground truth images into given output directory.

Loading…
Cancel
Save