page alto label generation activated for textline

This commit is contained in:
vahidrezanezhad 2026-03-03 21:12:20 +01:00
parent 4b80e45d91
commit f1d8257496
3 changed files with 211 additions and 171 deletions

View file

@ -92,7 +92,7 @@ def linegt_cli(
tree = ET.parse(dir_xml) tree = ET.parse(dir_xml)
root = tree.getroot() root = tree.getroot()
NS = {"alto": "http://www.loc.gov/standards/alto/ns-v4#"} NS = {'alto': root.tag.split('}')[0].strip('{')}#{"alto": "http://www.loc.gov/standards/alto/ns-v4#"}
results = [] results = []

View file

@ -73,8 +73,14 @@ def main():
is_flag=True, is_flag=True,
help="if this parameter set to true, generated labels and in the case of provided org images cropping will be imposed and cropped labels and images will be written in output directories.", help="if this parameter set to true, generated labels and in the case of provided org images cropping will be imposed and cropped labels and images will be written in output directories.",
) )
@click.option(
"--page_alto",
"-alto",
is_flag=True,
help="If this parameter is set to True, textline label generation is performed using PAGE/ALTO files. Otherwise, the default method for PAGE XML files is used.",
)
def pagexml2label(dir_xml,dir_out,type_output,config, printspace, dir_images, dir_out_images): def pagexml2label(dir_xml,dir_out,type_output,config, printspace, dir_images, dir_out_images, page_alto):
if config: if config:
with open(config) as f: with open(config) as f:
config_params = json.load(f) config_params = json.load(f)
@ -82,7 +88,7 @@ def pagexml2label(dir_xml,dir_out,type_output,config, printspace, dir_images, di
print("passed") print("passed")
config_params = None config_params = None
gt_list = get_content_of_dir(dir_xml) gt_list = get_content_of_dir(dir_xml)
get_images_of_ground_truth(gt_list,dir_xml,dir_out,type_output, config, config_params, printspace, dir_images, dir_out_images) get_images_of_ground_truth(gt_list,dir_xml,dir_out,type_output, config, config_params, printspace, dir_images, dir_out_images, page_alto)
@main.command() @main.command()
@click.option( @click.option(

View file

@ -686,7 +686,7 @@ def get_layout_contours_for_visualization(xml_file):
co_noise.append(np.array(c_t_in)) co_noise.append(np.array(c_t_in))
return co_text, co_graphic, co_sep, co_img, co_table, co_map, co_music, co_noise, y_len, x_len return co_text, co_graphic, co_sep, co_img, co_table, co_map, co_music, co_noise, y_len, x_len
def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_file, config_params, printspace, dir_images, dir_out_images): def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_file, config_params, printspace, dir_images, dir_out_images, page_alto=False):
""" """
Reading the page xml files and write the ground truth images into given output directory. Reading the page xml files and write the ground truth images into given output directory.
""" """
@ -696,8 +696,22 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
ls_org_imgs = os.listdir(dir_images) ls_org_imgs = os.listdir(dir_images)
ls_org_imgs_stem = [os.path.splitext(item)[0] for item in ls_org_imgs] ls_org_imgs_stem = [os.path.splitext(item)[0] for item in ls_org_imgs]
for index in tqdm(range(len(gt_list))): for index in tqdm(range(len(gt_list))):
#try:
print(gt_list[index]) print(gt_list[index])
try:
if page_alto:
tree = ET.parse(dir_in+'/'+gt_list[index])
root = tree.getroot()
NS = {'alto': root.tag.split('}')[0].strip('{')}#{"alto": "http://www.loc.gov/standards/alto/ns-v4#"}
x_len, y_len = 0, 0
page = root.find('.//alto:Page', NS)
x_len = int( page.get("WIDTH") )
y_len = int( page.get("HEIGHT") )
else:
tree1 = ET.parse(dir_in+'/'+gt_list[index], parser = ET.XMLParser(encoding='utf-8')) tree1 = ET.parse(dir_in+'/'+gt_list[index], parser = ET.XMLParser(encoding='utf-8'))
root1=tree1.getroot() root1=tree1.getroot()
alltags=[elem.tag for elem in root1.iter()] alltags=[elem.tag for elem in root1.iter()]
@ -783,6 +797,24 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
textline_rgb_color = (255, 0, 0) textline_rgb_color = (255, 0, 0)
if page_alto:
co_use_case = []
for line in root.findall(".//alto:TextLine", NS):
string_el = line.find("alto:String", NS)
textline_text = string_el.attrib["CONTENT"] if string_el is not None else None
polygon_el = line.find("alto:Shape/alto:Polygon", NS)
if polygon_el is None:
continue
points = polygon_el.attrib["POINTS"].split()
coords = [
(int(points[i]), int(points[i + 1]))
for i in range(0, len(points), 2)
]
co_use_case.append( np.array(coords, dtype=np.int32) )
else:
if config_params['use_case']=='textline': if config_params['use_case']=='textline':
region_tags = np.unique([x for x in alltags if x.endswith('TextLine')]) region_tags = np.unique([x for x in alltags if x.endswith('TextLine')])
elif config_params['use_case']=='word': elif config_params['use_case']=='word':
@ -879,6 +911,8 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
cv2.imwrite(os.path.join(dir_out_images, org_image_name), img_org) cv2.imwrite(os.path.join(dir_out_images, org_image_name), img_org)
except:
pass
if config_file and config_params['use_case']=='layout': if config_file and config_params['use_case']=='layout':
keys = list(config_params.keys()) keys = list(config_params.keys())