mirror of
https://github.com/qurator-spk/sbb_pixelwise_segmentation.git
synced 2025-08-17 21:49:57 +02:00
adding visualization of ocr text of xml file
This commit is contained in:
parent
cf63bd92bc
commit
1fe31bdeb3
2 changed files with 152 additions and 0 deletions
|
@ -3,6 +3,7 @@ import json
|
||||||
from gt_gen_utils import *
|
from gt_gen_utils import *
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
def main():
|
def main():
|
||||||
|
@ -447,6 +448,86 @@ def visualize_layout_segmentation(xml_file, dir_xml, dir_out, dir_imgs):
|
||||||
cv2.imwrite(os.path.join(dir_out, f_name+'.png'), added_image)
|
cv2.imwrite(os.path.join(dir_out, f_name+'.png'), added_image)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@main.command()
|
||||||
|
@click.option(
|
||||||
|
"--xml_file",
|
||||||
|
"-xml",
|
||||||
|
help="xml filename",
|
||||||
|
type=click.Path(exists=True, dir_okay=False),
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--dir_xml",
|
||||||
|
"-dx",
|
||||||
|
help="directory of GT page-xml files",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
@click.option(
|
||||||
|
"--dir_out",
|
||||||
|
"-do",
|
||||||
|
help="directory where plots will be written",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def visualize_ocr_text(xml_file, dir_xml, dir_out):
|
||||||
|
assert xml_file or dir_xml, "A single xml file -xml or a dir of xml files -dx is required not both of them"
|
||||||
|
if dir_xml:
|
||||||
|
xml_files_ind = os.listdir(dir_xml)
|
||||||
|
else:
|
||||||
|
xml_files_ind = [xml_file]
|
||||||
|
|
||||||
|
font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
|
||||||
|
font = ImageFont.truetype(font_path, 40)
|
||||||
|
|
||||||
|
for ind_xml in tqdm(xml_files_ind):
|
||||||
|
indexer = 0
|
||||||
|
#print(ind_xml)
|
||||||
|
#print('########################')
|
||||||
|
if dir_xml:
|
||||||
|
xml_file = os.path.join(dir_xml,ind_xml )
|
||||||
|
f_name = Path(ind_xml).stem
|
||||||
|
else:
|
||||||
|
xml_file = os.path.join(ind_xml )
|
||||||
|
f_name = Path(ind_xml).stem
|
||||||
|
print(f_name, 'f_name')
|
||||||
|
|
||||||
|
co_tetxlines, y_len, x_len, ocr_texts = get_textline_contours_and_ocr_text(xml_file)
|
||||||
|
|
||||||
|
total_bb_coordinates = []
|
||||||
|
|
||||||
|
image_text = Image.new("RGB", (x_len, y_len), "white")
|
||||||
|
draw = ImageDraw.Draw(image_text)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for index, cnt in enumerate(co_tetxlines):
|
||||||
|
x,y,w,h = cv2.boundingRect(cnt)
|
||||||
|
#total_bb_coordinates.append([x,y,w,h])
|
||||||
|
|
||||||
|
#fit_text_single_line
|
||||||
|
|
||||||
|
#x_bb = bb_ind[0]
|
||||||
|
#y_bb = bb_ind[1]
|
||||||
|
#w_bb = bb_ind[2]
|
||||||
|
#h_bb = bb_ind[3]
|
||||||
|
|
||||||
|
font = fit_text_single_line(draw, ocr_texts[index], font_path, w, int(h*0.4) )
|
||||||
|
|
||||||
|
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
|
||||||
|
|
||||||
|
text_bbox = draw.textbbox((0, 0), ocr_texts[index], font=font)
|
||||||
|
text_width = text_bbox[2] - text_bbox[0]
|
||||||
|
text_height = text_bbox[3] - text_bbox[1]
|
||||||
|
|
||||||
|
text_x = x + (w - text_width) // 2 # Center horizontally
|
||||||
|
text_y = y + (h - text_height) // 2 # Center vertically
|
||||||
|
|
||||||
|
# Draw the text
|
||||||
|
draw.text((text_x, text_y), ocr_texts[index], fill="black", font=font)
|
||||||
|
image_text.save(os.path.join(dir_out, f_name+'.png'))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -9,6 +9,7 @@ import cv2
|
||||||
from shapely import geometry
|
from shapely import geometry
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
|
|
||||||
KERNEL = np.ones((5, 5), np.uint8)
|
KERNEL = np.ones((5, 5), np.uint8)
|
||||||
|
@ -283,6 +284,76 @@ def get_textline_contours_for_visualization(xml_file):
|
||||||
return co_use_case, y_len, x_len
|
return co_use_case, y_len, x_len
|
||||||
|
|
||||||
|
|
||||||
|
def get_textline_contours_and_ocr_text(xml_file):
|
||||||
|
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding = 'iso-8859-5'))
|
||||||
|
root1=tree1.getroot()
|
||||||
|
alltags=[elem.tag for elem in root1.iter()]
|
||||||
|
link=alltags[0].split('}')[0]+'}'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for jj in root1.iter(link+'Page'):
|
||||||
|
y_len=int(jj.attrib['imageHeight'])
|
||||||
|
x_len=int(jj.attrib['imageWidth'])
|
||||||
|
|
||||||
|
region_tags = np.unique([x for x in alltags if x.endswith('TextLine')])
|
||||||
|
tag_endings = ['}TextLine','}textline']
|
||||||
|
co_use_case = []
|
||||||
|
ocr_textlines = []
|
||||||
|
|
||||||
|
for tag in region_tags:
|
||||||
|
if tag.endswith(tag_endings[0]) or tag.endswith(tag_endings[1]):
|
||||||
|
for nn in root1.iter(tag):
|
||||||
|
c_t_in = []
|
||||||
|
ocr_text_in = ['']
|
||||||
|
sumi = 0
|
||||||
|
for vv in nn.iter():
|
||||||
|
if vv.tag == link + 'Coords':
|
||||||
|
for childtest2 in nn:
|
||||||
|
if childtest2.tag.endswith("TextEquiv"):
|
||||||
|
for child_uc in childtest2:
|
||||||
|
if child_uc.tag.endswith("Unicode"):
|
||||||
|
text = child_uc.text
|
||||||
|
ocr_text_in[0]= text
|
||||||
|
|
||||||
|
coords = bool(vv.attrib)
|
||||||
|
if coords:
|
||||||
|
p_h = vv.attrib['points'].split(' ')
|
||||||
|
c_t_in.append(
|
||||||
|
np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h]))
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if vv.tag == link + 'Point':
|
||||||
|
c_t_in.append([int(float(vv.attrib['x'])), int(float(vv.attrib['y']))])
|
||||||
|
sumi += 1
|
||||||
|
elif vv.tag != link + 'Point' and sumi >= 1:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
co_use_case.append(np.array(c_t_in))
|
||||||
|
ocr_textlines.append(ocr_text_in[0])
|
||||||
|
return co_use_case, y_len, x_len, ocr_textlines
|
||||||
|
|
||||||
|
def fit_text_single_line(draw, text, font_path, max_width, max_height):
|
||||||
|
initial_font_size = 50
|
||||||
|
font_size = initial_font_size
|
||||||
|
while font_size > 10: # Minimum font size
|
||||||
|
font = ImageFont.truetype(font_path, font_size)
|
||||||
|
text_bbox = draw.textbbox((0, 0), text, font=font) # Get text bounding box
|
||||||
|
text_width = text_bbox[2] - text_bbox[0]
|
||||||
|
text_height = text_bbox[3] - text_bbox[1]
|
||||||
|
|
||||||
|
if text_width <= max_width and text_height <= max_height:
|
||||||
|
return font # Return the best-fitting font
|
||||||
|
|
||||||
|
font_size -= 2 # Reduce font size and retry
|
||||||
|
|
||||||
|
return ImageFont.truetype(font_path, 10) # Smallest font fallback
|
||||||
|
|
||||||
def get_layout_contours_for_visualization(xml_file):
|
def get_layout_contours_for_visualization(xml_file):
|
||||||
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding = 'iso-8859-5'))
|
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding = 'iso-8859-5'))
|
||||||
root1=tree1.getroot()
|
root1=tree1.getroot()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue