diff --git a/generate_gt_for_training.py b/generate_gt_for_training.py index 8ca5cd3..1971f68 100644 --- a/generate_gt_for_training.py +++ b/generate_gt_for_training.py @@ -3,6 +3,7 @@ import json from gt_gen_utils import * from tqdm import tqdm from pathlib import Path +from PIL import Image, ImageDraw, ImageFont @click.group() def main(): @@ -447,6 +448,86 @@ def visualize_layout_segmentation(xml_file, dir_xml, dir_out, dir_imgs): cv2.imwrite(os.path.join(dir_out, f_name+'.png'), added_image) + + +@main.command() +@click.option( + "--xml_file", + "-xml", + help="xml filename", + type=click.Path(exists=True, dir_okay=False), +) +@click.option( + "--dir_xml", + "-dx", + help="directory of GT page-xml files", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--dir_out", + "-do", + help="directory where plots will be written", + type=click.Path(exists=True, file_okay=False), +) + + +def visualize_ocr_text(xml_file, dir_xml, dir_out): + assert xml_file or dir_xml, "A single xml file -xml or a dir of xml files -dx is required not both of them" + if dir_xml: + xml_files_ind = os.listdir(dir_xml) + else: + xml_files_ind = [xml_file] + + font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists! + font = ImageFont.truetype(font_path, 40) + + for ind_xml in tqdm(xml_files_ind): + indexer = 0 + #print(ind_xml) + #print('########################') + if dir_xml: + xml_file = os.path.join(dir_xml,ind_xml ) + f_name = Path(ind_xml).stem + else: + xml_file = os.path.join(ind_xml ) + f_name = Path(ind_xml).stem + print(f_name, 'f_name') + + co_tetxlines, y_len, x_len, ocr_texts = get_textline_contours_and_ocr_text(xml_file) + + total_bb_coordinates = [] + + image_text = Image.new("RGB", (x_len, y_len), "white") + draw = ImageDraw.Draw(image_text) + + + + for index, cnt in enumerate(co_tetxlines): + x,y,w,h = cv2.boundingRect(cnt) + #total_bb_coordinates.append([x,y,w,h]) + + #fit_text_single_line + + #x_bb = bb_ind[0] + #y_bb = bb_ind[1] + #w_bb = bb_ind[2] + #h_bb = bb_ind[3] + + font = fit_text_single_line(draw, ocr_texts[index], font_path, w, int(h*0.4) ) + + ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2) + + text_bbox = draw.textbbox((0, 0), ocr_texts[index], font=font) + text_width = text_bbox[2] - text_bbox[0] + text_height = text_bbox[3] - text_bbox[1] + + text_x = x + (w - text_width) // 2 # Center horizontally + text_y = y + (h - text_height) // 2 # Center vertically + + # Draw the text + draw.text((text_x, text_y), ocr_texts[index], fill="black", font=font) + image_text.save(os.path.join(dir_out, f_name+'.png')) if __name__ == "__main__": main() diff --git a/gt_gen_utils.py b/gt_gen_utils.py index 5076dd6..907e04d 100644 --- a/gt_gen_utils.py +++ b/gt_gen_utils.py @@ -9,6 +9,7 @@ import cv2 from shapely import geometry from pathlib import Path import matplotlib.pyplot as plt +from PIL import Image, ImageDraw, ImageFont KERNEL = np.ones((5, 5), np.uint8) @@ -283,6 +284,76 @@ def get_textline_contours_for_visualization(xml_file): return co_use_case, y_len, x_len +def get_textline_contours_and_ocr_text(xml_file): + tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding = 'iso-8859-5')) + root1=tree1.getroot() + alltags=[elem.tag for elem in root1.iter()] + link=alltags[0].split('}')[0]+'}' + + + + for jj in root1.iter(link+'Page'): + y_len=int(jj.attrib['imageHeight']) + x_len=int(jj.attrib['imageWidth']) + + region_tags = np.unique([x for x in alltags if x.endswith('TextLine')]) + tag_endings = ['}TextLine','}textline'] + co_use_case = [] + ocr_textlines = [] + + for tag in region_tags: + if tag.endswith(tag_endings[0]) or tag.endswith(tag_endings[1]): + for nn in root1.iter(tag): + c_t_in = [] + ocr_text_in = [''] + sumi = 0 + for vv in nn.iter(): + if vv.tag == link + 'Coords': + for childtest2 in nn: + if childtest2.tag.endswith("TextEquiv"): + for child_uc in childtest2: + if child_uc.tag.endswith("Unicode"): + text = child_uc.text + ocr_text_in[0]= text + + coords = bool(vv.attrib) + if coords: + p_h = vv.attrib['points'].split(' ') + c_t_in.append( + np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) + break + else: + pass + + + + if vv.tag == link + 'Point': + c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))]) + sumi += 1 + elif vv.tag != link + 'Point' and sumi >= 1: + break + + + co_use_case.append(np.array(c_t_in)) + ocr_textlines.append(ocr_text_in[0]) + return co_use_case, y_len, x_len, ocr_textlines + +def fit_text_single_line(draw, text, font_path, max_width, max_height): + initial_font_size = 50 + font_size = initial_font_size + while font_size > 10: # Minimum font size + font = ImageFont.truetype(font_path, font_size) + text_bbox = draw.textbbox((0, 0), text, font=font) # Get text bounding box + text_width = text_bbox[2] - text_bbox[0] + text_height = text_bbox[3] - text_bbox[1] + + if text_width <= max_width and text_height <= max_height: + return font # Return the best-fitting font + + font_size -= 2 # Reduce font size and retry + + return ImageFont.truetype(font_path, 10) # Smallest font fallback + def get_layout_contours_for_visualization(xml_file): tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding = 'iso-8859-5')) root1=tree1.getroot()