sbb_pixelwise_segmentation/generate_gt_for_training.py

import click
import json
from gt_gen_utils import *
from tqdm import tqdm

@click.group()
def main():
    pass

@main.command()
@click.option(
    "--dir_xml",
    "-dx",
    help="directory of GT page-xml files",
    type=click.Path(exists=True, file_okay=False),
)
@click.option(
    "--dir_out",
    "-do",
    help="directory where ground truth images would be written",
    type=click.Path(exists=True, file_okay=False),
)

@click.option(
    "--config",
    "-cfg",
    help="config file of prefered layout or use case.",
    type=click.Path(exists=True, dir_okay=False),
)

@click.option(
    "--type_output",
    "-to",
    help="this defines how output should be. A 2d image array or a 3d image array encoded with RGB color. Just pass 2d or 3d. The file will be saved one directory up. 2D image array is 3d but only information of one channel would be enough since all channels have the same values.",
)

def pagexml2label(dir_xml,dir_out,type_output,config):
    if config:
        with open(config) as f:
            config_params = json.load(f)
    else:
        print("passed")
        config_params = None
    gt_list = get_content_of_dir(dir_xml)
    get_images_of_ground_truth(gt_list,dir_xml,dir_out,type_output, config, config_params)
    
@main.command()
@click.option(
    "--dir_imgs",
    "-dis",
    help="directory of images with high resolution.",
    type=click.Path(exists=True, file_okay=False),
)
@click.option(
    "--dir_out_images",
    "-dois",
    help="directory where degraded images will be written.",
    type=click.Path(exists=True, file_okay=False),
)

@click.option(
    "--dir_out_labels",
    "-dols",
    help="directory where original images will be written as labels.",
    type=click.Path(exists=True, file_okay=False),
)
@click.option(
    "--scales",
    "-scs",
    help="json dictionary where the scales are written.",
    type=click.Path(exists=True, dir_okay=False),
)
def image_enhancement(dir_imgs, dir_out_images, dir_out_labels, scales):
    ls_imgs = os.listdir(dir_imgs)
    with open(scales) as f:
        scale_dict = json.load(f)
    ls_scales = scale_dict['scales']

    for img in tqdm(ls_imgs):
        img_name = img.split('.')[0]
        img_type = img.split('.')[1]
        image = cv2.imread(os.path.join(dir_imgs, img))
        for i, scale in enumerate(ls_scales):
            height_sc = int(image.shape[0]*scale)
            width_sc = int(image.shape[1]*scale)
            
            image_down_scaled = resize_image(image, height_sc, width_sc)
            image_back_to_org_scale = resize_image(image_down_scaled, image.shape[0], image.shape[1])
            
            cv2.imwrite(os.path.join(dir_out_images, img_name+'_'+str(i)+'.'+img_type), image_back_to_org_scale)
            cv2.imwrite(os.path.join(dir_out_labels, img_name+'_'+str(i)+'.'+img_type), image)
    
    
@main.command()
@click.option(
    "--dir_xml",
    "-dx",
    help="directory of GT page-xml files",
    type=click.Path(exists=True, file_okay=False),
)

@click.option(
    "--dir_out_modal_image",
    "-domi",
    help="directory where ground truth images would be written",
    type=click.Path(exists=True, file_okay=False),
)

@click.option(
    "--dir_out_classes",
    "-docl",
    help="directory where ground truth classes would be written",
    type=click.Path(exists=True, file_okay=False),
)

@click.option(
    "--input_height",
    "-ih",
    help="input height",
)
@click.option(
    "--input_width",
    "-iw",
    help="input width",
)
@click.option(
    "--min_area_size",
    "-min",
    help="min area size of regions considered for reading order training.",
)

def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size):
    xml_files_ind = os.listdir(dir_xml)
    input_height = int(input_height)
    input_width = int(input_width)
    min_area = float(min_area_size)

    indexer_start= 0#55166
    max_area = 1
    #min_area = 0.0001

    for ind_xml in tqdm(xml_files_ind):
        indexer = 0
        #print(ind_xml)
        #print('########################')
        xml_file = os.path.join(dir_xml,ind_xml )
        f_name = ind_xml.split('.')[0]
        file_name, id_paragraph, id_header,co_text_paragraph,\
        co_text_header,tot_region_ref,x_len, y_len,index_tot_regions,img_poly = read_xml(xml_file)
        
        id_all_text = id_paragraph + id_header
        co_text_all = co_text_paragraph + co_text_header
        
        
        _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(co_text_header)
        
        img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')

        for j in range(len(cy_main)):
            img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1


        texts_corr_order_index  = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ]
        texts_corr_order_index_int = [int(x) for x in texts_corr_order_index]
        

        co_text_all, texts_corr_order_index_int = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, min_area)
        
        arg_array = np.array(range(len(texts_corr_order_index_int)))
        
        labels_con = np.zeros((y_len,x_len,len(arg_array)),dtype='uint8')
        for i in range(len(co_text_all)):
            img_label = np.zeros((y_len,x_len,3),dtype='uint8')
            img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1))
            
            img_label[:,:,0][img_poly[:,:,0]==5] = 2
            img_label[:,:,0][img_header_and_sep[:,:]==1] = 3
            
            labels_con[:,:,i] = img_label[:,:,0]
        
        for i in range(len(texts_corr_order_index_int)):
            for j in range(len(texts_corr_order_index_int)):
                if i!=j:
                    input_matrix = np.zeros((input_height,input_width,3)).astype(np.int8)
                    final_f_name = f_name+'_'+str(indexer+indexer_start)
                    order_class_condition = texts_corr_order_index_int[i]-texts_corr_order_index_int[j]
                    if order_class_condition<0:
                        class_type = 1
                    else:
                        class_type = 0

                    input_matrix[:,:,0] = resize_image(labels_con[:,:,i], input_height, input_width)
                    input_matrix[:,:,1] = resize_image(img_poly[:,:,0], input_height, input_width)
                    input_matrix[:,:,2] = resize_image(labels_con[:,:,j], input_height, input_width)

                    np.save(os.path.join(dir_out_classes,final_f_name+'.npy' ), class_type)
                    
                    cv2.imwrite(os.path.join(dir_out_modal_image,final_f_name+'.png' ), input_matrix)
                    indexer = indexer+1

    
if __name__ == "__main__":
    main()
machine based reading order training dataset generator is added 2024-05-24 14:42:58 +02:00			`import click`
			`import json`
			`from gt_gen_utils import *`
			`from tqdm import tqdm`

			`@click.group()`
			`def main():`
			`pass`

			`@main.command()`
			`@click.option(`
			`"--dir_xml",`
			`"-dx",`
			`help="directory of GT page-xml files",`
			`type=click.Path(exists=True, file_okay=False),`
			`)`
			`@click.option(`
			`"--dir_out",`
			`"-do",`
			`help="directory where ground truth images would be written",`
			`type=click.Path(exists=True, file_okay=False),`
			`)`

			`@click.option(`
			`"--config",`
			`"-cfg",`
			`help="config file of prefered layout or use case.",`
			`type=click.Path(exists=True, dir_okay=False),`
			`)`

			`@click.option(`
			`"--type_output",`
			`"-to",`
			`help="this defines how output should be. A 2d image array or a 3d image array encoded with RGB color. Just pass 2d or 3d. The file will be saved one directory up. 2D image array is 3d but only information of one channel would be enough since all channels have the same values.",`
			`)`

			`def pagexml2label(dir_xml,dir_out,type_output,config):`
			`if config:`
			`with open(config) as f:`
			`config_params = json.load(f)`
			`else:`
			`print("passed")`
			`config_params = None`
			`gt_list = get_content_of_dir(dir_xml)`
			`get_images_of_ground_truth(gt_list,dir_xml,dir_out,type_output, config, config_params)`

			`@main.command()`
			`@click.option(`
			`"--dir_imgs",`
			`"-dis",`
			`help="directory of images with high resolution.",`
			`type=click.Path(exists=True, file_okay=False),`
			`)`
			`@click.option(`
			`"--dir_out_images",`
			`"-dois",`
			`help="directory where degraded images will be written.",`
			`type=click.Path(exists=True, file_okay=False),`
			`)`

			`@click.option(`
			`"--dir_out_labels",`
			`"-dols",`
			`help="directory where original images will be written as labels.",`
			`type=click.Path(exists=True, file_okay=False),`
			`)`
pass degrading scales for image enhancement as a json file 2024-05-28 10:01:17 +02:00			`@click.option(`
			`"--scales",`
			`"-scs",`
			`help="json dictionary where the scales are written.",`
			`type=click.Path(exists=True, dir_okay=False),`
			`)`
			`def image_enhancement(dir_imgs, dir_out_images, dir_out_labels, scales):`
machine based reading order training dataset generator is added 2024-05-24 14:42:58 +02:00			`ls_imgs = os.listdir(dir_imgs)`
pass degrading scales for image enhancement as a json file 2024-05-28 10:01:17 +02:00			`with open(scales) as f:`
			`scale_dict = json.load(f)`
			`ls_scales = scale_dict['scales']`
machine based reading order training dataset generator is added 2024-05-24 14:42:58 +02:00
			`for img in tqdm(ls_imgs):`
			`img_name = img.split('.')[0]`
			`img_type = img.split('.')[1]`
			`image = cv2.imread(os.path.join(dir_imgs, img))`
			`for i, scale in enumerate(ls_scales):`
			`height_sc = int(image.shape[0]*scale)`
			`width_sc = int(image.shape[1]*scale)`

			`image_down_scaled = resize_image(image, height_sc, width_sc)`
			`image_back_to_org_scale = resize_image(image_down_scaled, image.shape[0], image.shape[1])`

			`cv2.imwrite(os.path.join(dir_out_images, img_name+'_'+str(i)+'.'+img_type), image_back_to_org_scale)`
			`cv2.imwrite(os.path.join(dir_out_labels, img_name+'_'+str(i)+'.'+img_type), image)`


			`@main.command()`
			`@click.option(`
			`"--dir_xml",`
			`"-dx",`
			`help="directory of GT page-xml files",`
			`type=click.Path(exists=True, file_okay=False),`
			`)`

			`@click.option(`
			`"--dir_out_modal_image",`
			`"-domi",`
			`help="directory where ground truth images would be written",`
			`type=click.Path(exists=True, file_okay=False),`
			`)`

			`@click.option(`
			`"--dir_out_classes",`
			`"-docl",`
			`help="directory where ground truth classes would be written",`
			`type=click.Path(exists=True, file_okay=False),`
			`)`

			`@click.option(`
			`"--input_height",`
			`"-ih",`
min area size of text region passes as an argument for machine based reading order 2024-05-28 10:14:16 +02:00			`help="input height",`
machine based reading order training dataset generator is added 2024-05-24 14:42:58 +02:00			`)`
			`@click.option(`
			`"--input_width",`
			`"-iw",`
min area size of text region passes as an argument for machine based reading order 2024-05-28 10:14:16 +02:00			`help="input width",`
			`)`
			`@click.option(`
			`"--min_area_size",`
			`"-min",`
			`help="min area size of regions considered for reading order training.",`
machine based reading order training dataset generator is added 2024-05-24 14:42:58 +02:00			`)`

min area size of text region passes as an argument for machine based reading order 2024-05-28 10:14:16 +02:00			`def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size):`
machine based reading order training dataset generator is added 2024-05-24 14:42:58 +02:00			`xml_files_ind = os.listdir(dir_xml)`
			`input_height = int(input_height)`
			`input_width = int(input_width)`
min area size of text region passes as an argument for machine based reading order 2024-05-28 10:14:16 +02:00			`min_area = float(min_area_size)`
machine based reading order training dataset generator is added 2024-05-24 14:42:58 +02:00
			`indexer_start= 0#55166`
			`max_area = 1`
min area size of text region passes as an argument for machine based reading order 2024-05-28 10:14:16 +02:00			`#min_area = 0.0001`
machine based reading order training dataset generator is added 2024-05-24 14:42:58 +02:00
			`for ind_xml in tqdm(xml_files_ind):`
			`indexer = 0`
			`#print(ind_xml)`
			`#print('########################')`
			`xml_file = os.path.join(dir_xml,ind_xml )`
			`f_name = ind_xml.split('.')[0]`
			`file_name, id_paragraph, id_header,co_text_paragraph,\`
			`co_text_header,tot_region_ref,x_len, y_len,index_tot_regions,img_poly = read_xml(xml_file)`

			`id_all_text = id_paragraph + id_header`
			`co_text_all = co_text_paragraph + co_text_header`


			`_, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(co_text_header)`

			`img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')`

			`for j in range(len(cy_main)):`
			`img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1`


			`texts_corr_order_index = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ]`
			`texts_corr_order_index_int = [int(x) for x in texts_corr_order_index]`


			`co_text_all, texts_corr_order_index_int = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, min_area)`

			`arg_array = np.array(range(len(texts_corr_order_index_int)))`

			`labels_con = np.zeros((y_len,x_len,len(arg_array)),dtype='uint8')`
			`for i in range(len(co_text_all)):`
			`img_label = np.zeros((y_len,x_len,3),dtype='uint8')`
			`img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1))`

			`img_label[:,:,0][img_poly[:,:,0]==5] = 2`
			`img_label[:,:,0][img_header_and_sep[:,:]==1] = 3`

			`labels_con[:,:,i] = img_label[:,:,0]`

			`for i in range(len(texts_corr_order_index_int)):`
			`for j in range(len(texts_corr_order_index_int)):`
			`if i!=j:`
			`input_matrix = np.zeros((input_height,input_width,3)).astype(np.int8)`
			`final_f_name = f_name+'_'+str(indexer+indexer_start)`
			`order_class_condition = texts_corr_order_index_int[i]-texts_corr_order_index_int[j]`
			`if order_class_condition<0:`
			`class_type = 1`
			`else:`
			`class_type = 0`

			`input_matrix[:,:,0] = resize_image(labels_con[:,:,i], input_height, input_width)`
			`input_matrix[:,:,1] = resize_image(img_poly[:,:,0], input_height, input_width)`
			`input_matrix[:,:,2] = resize_image(labels_con[:,:,j], input_height, input_width)`

			`np.save(os.path.join(dir_out_classes,final_f_name+'.npy' ), class_type)`

			`cv2.imwrite(os.path.join(dir_out_modal_image,final_f_name+'.png' ), input_matrix)`
			`indexer = indexer+1`



			`if __name__ == "__main__":`
			`main()`