mirror of
https://github.com/qurator-spk/sbb_pixelwise_segmentation.git
synced 2025-06-09 20:00:05 +02:00
machine based reading order training dataset generator is added
This commit is contained in:
parent
f5746011f6
commit
bf1468391a
4 changed files with 1433 additions and 820 deletions
194
generate_gt_for_training.py
Normal file
194
generate_gt_for_training.py
Normal file
|
@ -0,0 +1,194 @@
|
|||
import click
|
||||
import json
|
||||
from gt_gen_utils import *
|
||||
from tqdm import tqdm
|
||||
|
||||
@click.group()
|
||||
def main():
|
||||
pass
|
||||
|
||||
@main.command()
|
||||
@click.option(
|
||||
"--dir_xml",
|
||||
"-dx",
|
||||
help="directory of GT page-xml files",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--dir_out",
|
||||
"-do",
|
||||
help="directory where ground truth images would be written",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
|
||||
@click.option(
|
||||
"--config",
|
||||
"-cfg",
|
||||
help="config file of prefered layout or use case.",
|
||||
type=click.Path(exists=True, dir_okay=False),
|
||||
)
|
||||
|
||||
@click.option(
|
||||
"--type_output",
|
||||
"-to",
|
||||
help="this defines how output should be. A 2d image array or a 3d image array encoded with RGB color. Just pass 2d or 3d. The file will be saved one directory up. 2D image array is 3d but only information of one channel would be enough since all channels have the same values.",
|
||||
)
|
||||
|
||||
def pagexml2label(dir_xml,dir_out,type_output,config):
|
||||
if config:
|
||||
with open(config) as f:
|
||||
config_params = json.load(f)
|
||||
else:
|
||||
print("passed")
|
||||
config_params = None
|
||||
gt_list = get_content_of_dir(dir_xml)
|
||||
get_images_of_ground_truth(gt_list,dir_xml,dir_out,type_output, config, config_params)
|
||||
|
||||
@main.command()
|
||||
@click.option(
|
||||
"--dir_imgs",
|
||||
"-dis",
|
||||
help="directory of images with high resolution.",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--dir_out_images",
|
||||
"-dois",
|
||||
help="directory where degraded images will be written.",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
|
||||
@click.option(
|
||||
"--dir_out_labels",
|
||||
"-dols",
|
||||
help="directory where original images will be written as labels.",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
def image_enhancement(dir_imgs, dir_out_images, dir_out_labels):
|
||||
#dir_imgs = './training_data_sample_enhancement/images'
|
||||
#dir_out_images = './training_data_sample_enhancement/images_gt'
|
||||
#dir_out_labels = './training_data_sample_enhancement/labels_gt'
|
||||
|
||||
ls_imgs = os.listdir(dir_imgs)
|
||||
ls_scales = [ 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9]
|
||||
|
||||
for img in tqdm(ls_imgs):
|
||||
img_name = img.split('.')[0]
|
||||
img_type = img.split('.')[1]
|
||||
image = cv2.imread(os.path.join(dir_imgs, img))
|
||||
for i, scale in enumerate(ls_scales):
|
||||
height_sc = int(image.shape[0]*scale)
|
||||
width_sc = int(image.shape[1]*scale)
|
||||
|
||||
image_down_scaled = resize_image(image, height_sc, width_sc)
|
||||
image_back_to_org_scale = resize_image(image_down_scaled, image.shape[0], image.shape[1])
|
||||
|
||||
cv2.imwrite(os.path.join(dir_out_images, img_name+'_'+str(i)+'.'+img_type), image_back_to_org_scale)
|
||||
cv2.imwrite(os.path.join(dir_out_labels, img_name+'_'+str(i)+'.'+img_type), image)
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option(
|
||||
"--dir_xml",
|
||||
"-dx",
|
||||
help="directory of GT page-xml files",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
|
||||
@click.option(
|
||||
"--dir_out_modal_image",
|
||||
"-domi",
|
||||
help="directory where ground truth images would be written",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
|
||||
@click.option(
|
||||
"--dir_out_classes",
|
||||
"-docl",
|
||||
help="directory where ground truth classes would be written",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
|
||||
@click.option(
|
||||
"--input_height",
|
||||
"-ih",
|
||||
help="input_height",
|
||||
)
|
||||
@click.option(
|
||||
"--input_width",
|
||||
"-iw",
|
||||
help="input_width",
|
||||
)
|
||||
|
||||
def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width):
|
||||
xml_files_ind = os.listdir(dir_xml)
|
||||
input_height = int(input_height)
|
||||
input_width = int(input_width)
|
||||
|
||||
indexer_start= 0#55166
|
||||
max_area = 1
|
||||
min_area = 0.0001
|
||||
|
||||
for ind_xml in tqdm(xml_files_ind):
|
||||
indexer = 0
|
||||
#print(ind_xml)
|
||||
#print('########################')
|
||||
xml_file = os.path.join(dir_xml,ind_xml )
|
||||
f_name = ind_xml.split('.')[0]
|
||||
file_name, id_paragraph, id_header,co_text_paragraph,\
|
||||
co_text_header,tot_region_ref,x_len, y_len,index_tot_regions,img_poly = read_xml(xml_file)
|
||||
|
||||
id_all_text = id_paragraph + id_header
|
||||
co_text_all = co_text_paragraph + co_text_header
|
||||
|
||||
|
||||
_, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(co_text_header)
|
||||
|
||||
img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
|
||||
|
||||
for j in range(len(cy_main)):
|
||||
img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1
|
||||
|
||||
|
||||
texts_corr_order_index = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ]
|
||||
texts_corr_order_index_int = [int(x) for x in texts_corr_order_index]
|
||||
|
||||
|
||||
co_text_all, texts_corr_order_index_int = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, min_area)
|
||||
|
||||
arg_array = np.array(range(len(texts_corr_order_index_int)))
|
||||
|
||||
labels_con = np.zeros((y_len,x_len,len(arg_array)),dtype='uint8')
|
||||
for i in range(len(co_text_all)):
|
||||
img_label = np.zeros((y_len,x_len,3),dtype='uint8')
|
||||
img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1))
|
||||
|
||||
img_label[:,:,0][img_poly[:,:,0]==5] = 2
|
||||
img_label[:,:,0][img_header_and_sep[:,:]==1] = 3
|
||||
|
||||
labels_con[:,:,i] = img_label[:,:,0]
|
||||
|
||||
for i in range(len(texts_corr_order_index_int)):
|
||||
for j in range(len(texts_corr_order_index_int)):
|
||||
if i!=j:
|
||||
input_matrix = np.zeros((input_height,input_width,3)).astype(np.int8)
|
||||
final_f_name = f_name+'_'+str(indexer+indexer_start)
|
||||
order_class_condition = texts_corr_order_index_int[i]-texts_corr_order_index_int[j]
|
||||
if order_class_condition<0:
|
||||
class_type = 1
|
||||
else:
|
||||
class_type = 0
|
||||
|
||||
input_matrix[:,:,0] = resize_image(labels_con[:,:,i], input_height, input_width)
|
||||
input_matrix[:,:,1] = resize_image(img_poly[:,:,0], input_height, input_width)
|
||||
input_matrix[:,:,2] = resize_image(labels_con[:,:,j], input_height, input_width)
|
||||
|
||||
np.save(os.path.join(dir_out_classes,final_f_name+'.npy' ), class_type)
|
||||
|
||||
cv2.imwrite(os.path.join(dir_out_modal_image,final_f_name+'.png' ), input_matrix)
|
||||
indexer = indexer+1
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue