diff --git a/generate_gt_for_training.py b/generate_gt_for_training.py index cfcc151..9e0f45e 100644 --- a/generate_gt_for_training.py +++ b/generate_gt_for_training.py @@ -214,6 +214,73 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i cv2.imwrite(os.path.join(dir_out_modal_image,final_f_name+'.png' ), input_multi_visual_modal) indexer = indexer+1 + + +@main.command() +@click.option( + "--dir_xml", + "-dx", + help="directory of GT page-xml files", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--dir_out", + "-do", + help="directory where plots will be written", + type=click.Path(exists=True, file_okay=False), +) + + +def visualize_reading_order(dir_xml, dir_out): + xml_files_ind = os.listdir(dir_xml) + + + indexer_start= 0#55166 + #min_area = 0.0001 + + for ind_xml in tqdm(xml_files_ind): + indexer = 0 + #print(ind_xml) + #print('########################') + xml_file = os.path.join(dir_xml,ind_xml ) + f_name = ind_xml.split('.')[0] + _, _, _, file_name, id_paragraph, id_header,co_text_paragraph,co_text_header,tot_region_ref,x_len, y_len,index_tot_regions,img_poly = read_xml(xml_file) + + id_all_text = id_paragraph + id_header + co_text_all = co_text_paragraph + co_text_header + + + cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(co_text_all) + + texts_corr_order_index = [int(index_tot_regions[tot_region_ref.index(i)]) for i in id_all_text ] + #texts_corr_order_index_int = [int(x) for x in texts_corr_order_index] + + + #cx_ordered = np.array(cx_main)[np.array(texts_corr_order_index)] + #cx_ordered = cx_ordered.astype(np.int32) + + cx_ordered = [int(val) for (_, val) in sorted(zip(texts_corr_order_index, cx_main), key=lambda x: \ + x[0], reverse=False)] + #cx_ordered = cx_ordered.astype(np.int32) + + cy_ordered = [int(val) for (_, val) in sorted(zip(texts_corr_order_index, cy_main), key=lambda x: \ + x[0], reverse=False)] + #cy_ordered = cy_ordered.astype(np.int32) + + + color = (0, 0, 255) + thickness = 20 + + img = np.zeros( (y_len,x_len,3) ) + img = cv2.fillPoly(img, pts =co_text_all, color=(255,0,0)) + for i in range(len(cx_ordered)-1): + start_point = (int(cx_ordered[i]), int(cy_ordered[i])) + end_point = (int(cx_ordered[i+1]), int(cy_ordered[i+1])) + img = cv2.arrowedLine(img, start_point, end_point, + color, thickness, tipLength = 0.03) + + cv2.imwrite(os.path.join(dir_out, f_name+'.png'), img)