From fc75770b73f7c50efe1b372450b5a5174771d4a8 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 12 May 2025 18:31:40 +0200 Subject: [PATCH] visulizing textline detection from eynollah page-xml output --- generate_gt_for_training.py | 48 ++++++++++++++++++++ gt_gen_utils.py | 88 +++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) diff --git a/generate_gt_for_training.py b/generate_gt_for_training.py index 77e9238..9ce743a 100644 --- a/generate_gt_for_training.py +++ b/generate_gt_for_training.py @@ -2,6 +2,7 @@ import click import json from gt_gen_utils import * from tqdm import tqdm +from pathlib import Path @click.group() def main(): @@ -331,6 +332,53 @@ def visualize_reading_order(dir_xml, dir_out, dir_imgs): cv2.imwrite(os.path.join(dir_out, f_name+'.png'), img) +@main.command() +@click.option( + "--dir_xml", + "-dx", + help="directory of GT page-xml files", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--dir_out", + "-do", + help="directory where plots will be written", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--dir_imgs", + "-dimg", + help="directory of images where textline segmentation will be overlayed", ) + +def visualize_textline_segmentation(dir_xml, dir_out, dir_imgs): + xml_files_ind = os.listdir(dir_xml) + for ind_xml in tqdm(xml_files_ind): + indexer = 0 + #print(ind_xml) + #print('########################') + xml_file = os.path.join(dir_xml,ind_xml ) + f_name = Path(ind_xml).stem + + img_file_name_with_format = find_format_of_given_filename_in_dir(dir_imgs, f_name) + img = cv2.imread(os.path.join(dir_imgs, img_file_name_with_format)) + + co_tetxlines, y_len, x_len = get_textline_contours_for_visualization(xml_file) + + img_total = np.zeros((y_len, x_len, 3)) + for cont in co_tetxlines: + img_in = np.zeros((y_len, x_len, 3)) + img_in = cv2.fillPoly(img_in, pts =[cont], color=(1,1,1)) + + img_total = img_total + img_in + + img_total[:,:, 0][img_total[:,:, 0]>2] = 2 + + img_out, _ = visualize_model_output(img_total, img, task="textline") + + cv2.imwrite(os.path.join(dir_out, f_name+'.png'), img_out) + if __name__ == "__main__": main() diff --git a/gt_gen_utils.py b/gt_gen_utils.py index 10183d6..0a65f05 100644 --- a/gt_gen_utils.py +++ b/gt_gen_utils.py @@ -16,6 +16,52 @@ KERNEL = np.ones((5, 5), np.uint8) with warnings.catch_warnings(): warnings.simplefilter("ignore") +def visualize_model_output(prediction, img, task): + if task == "binarization": + prediction = prediction * -1 + prediction = prediction + 1 + added_image = prediction * 255 + layout_only = None + else: + unique_classes = np.unique(prediction[:,:,0]) + rgb_colors = {'0' : [255, 255, 255], + '1' : [255, 0, 0], + '2' : [255, 125, 0], + '3' : [255, 0, 125], + '4' : [125, 125, 125], + '5' : [125, 125, 0], + '6' : [0, 125, 255], + '7' : [0, 125, 0], + '8' : [125, 125, 125], + '9' : [0, 125, 255], + '10' : [125, 0, 125], + '11' : [0, 255, 0], + '12' : [0, 0, 255], + '13' : [0, 255, 255], + '14' : [255, 125, 125], + '15' : [255, 0, 255]} + + layout_only = np.zeros(prediction.shape) + + for unq_class in unique_classes: + rgb_class_unique = rgb_colors[str(int(unq_class))] + layout_only[:,:,0][prediction[:,:,0]==unq_class] = rgb_class_unique[0] + layout_only[:,:,1][prediction[:,:,0]==unq_class] = rgb_class_unique[1] + layout_only[:,:,2][prediction[:,:,0]==unq_class] = rgb_class_unique[2] + + + + img = resize_image(img, layout_only.shape[0], layout_only.shape[1]) + + layout_only = layout_only.astype(np.int32) + img = img.astype(np.int32) + + + + added_image = cv2.addWeighted(img,0.5,layout_only,0.1,0) + + return added_image, layout_only + def get_content_of_dir(dir_in): """ Listing all ground truth page xml files. All files are needed to have xml format. @@ -138,6 +184,48 @@ def update_region_contours(co_text, img_boundary, erosion_rate, dilation_rate, y img_boundary[:,:][boundary[:,:]==1] =1 return co_text_eroded, img_boundary + +def get_textline_contours_for_visualization(xml_file): + tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding = 'iso-8859-5')) + root1=tree1.getroot() + alltags=[elem.tag for elem in root1.iter()] + link=alltags[0].split('}')[0]+'}' + + + + for jj in root1.iter(link+'Page'): + y_len=int(jj.attrib['imageHeight']) + x_len=int(jj.attrib['imageWidth']) + + region_tags = np.unique([x for x in alltags if x.endswith('TextLine')]) + tag_endings = ['}TextLine','}textline'] + co_use_case = [] + + for tag in region_tags: + if tag.endswith(tag_endings[0]) or tag.endswith(tag_endings[1]): + for nn in root1.iter(tag): + c_t_in = [] + sumi = 0 + for vv in nn.iter(): + if vv.tag == link + 'Coords': + coords = bool(vv.attrib) + if coords: + p_h = vv.attrib['points'].split(' ') + c_t_in.append( + np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) + break + else: + pass + + if vv.tag == link + 'Point': + c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))]) + sumi += 1 + elif vv.tag != link + 'Point' and sumi >= 1: + break + co_use_case.append(np.array(c_t_in)) + return co_use_case, y_len, x_len + + def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_file, config_params, printspace, dir_images, dir_out_images): """ Reading the page xml files and write the ground truth images into given output directory.