remove (half-implemented) page_alto functionality

2026-06-16 09:59:13 +02:00 · 2026-06-11 17:04:56 +02:00 · 2026-06-11 17:04:56 +02:00 · bed7fe526b
commit bed7fe526b
parent 9858221724
3 changed files with 169 additions and 291 deletions
--- a/src/eynollah/training/extract_line_gt.py
+++ b/src/eynollah/training/extract_line_gt.py
@ -56,12 +56,6 @@ from ..utils import is_image_filename
    is_flag=True,
    help="if this parameter set to true, vertical textline images will be excluded.",
 )
@click.option(
    "--page_alto",
    "-alto",
    is_flag=True,
    help="If this parameter is set to True, text line image cropping and text extraction are performed using PAGE/ALTO files. Otherwise, the default method for PAGE XML files is used.",
 )
 def linegt_cli(
    image,
    dir_in,
@ -70,7 +64,6 @@ def linegt_cli(
    pref_of_dataset,
    do_not_mask_with_textline_contour,
    exclude_vertical_lines,
    page_alto,
 ):
    assert bool(dir_in) ^ bool(image), "Set --dir-in or --image-filename, not both"
    if dir_in:
@ -86,84 +79,6 @@ def linegt_cli(
        dir_xml = os.path.join(dir_xmls, file_name + '.xml')
        img = cv2.imread(dir_img)
        if page_alto:
            h, w = img.shape[:2]
            tree = ET.parse(dir_xml)
            root = tree.getroot()
            NS = {'alto': root.tag.split('}')[0].strip('{')}#{"alto": "http://www.loc.gov/standards/alto/ns-v4#"}
            results = []
            indexer_textlines = 0
            for line in root.findall(".//alto:TextLine", NS):
                string_el = line.find("alto:String", NS)
                textline_text = string_el.attrib["CONTENT"] if string_el is not None else None
                polygon_el = line.find("alto:Shape/alto:Polygon", NS)
                if polygon_el is None:
                    continue
                points = polygon_el.attrib["POINTS"].split()
                coords = [
                    (int(points[i]), int(points[i + 1]))
                    for i in range(0, len(points), 2)
                ]
                coords = np.array(coords, dtype=np.int32)
                x, y, w, h = cv2.boundingRect(coords)
                if exclude_vertical_lines and h > 1.4 * w:
                    img_crop = None
                    continue
                img_poly_on_img = np.copy(img)
                mask_poly = np.zeros(img.shape)
                mask_poly = cv2.fillPoly(mask_poly, pts=[coords], color=(1, 1, 1))
                mask_poly = mask_poly[y : y + h, x : x + w, :]
                img_crop = img_poly_on_img[y : y + h, x : x + w, :]
                if not do_not_mask_with_textline_contour:
                    img_crop[mask_poly == 0] = 255
                if img_crop.shape[0] == 0 or img_crop.shape[1] == 0:
                    img_crop = None
                    continue
                if textline_text and img_crop is not None:
                    base_name = os.path.join(
                        dir_out, file_name + '_line_' + str(indexer_textlines)
                    )
                    if pref_of_dataset:
                        base_name += '_' + pref_of_dataset
                    if not do_not_mask_with_textline_contour:
                        base_name += '_masked'
                    with open(base_name + '.txt', 'w') as text_file:
                        text_file.write(textline_text)
                    cv2.imwrite(base_name + '.png', img_crop)
                    indexer_textlines += 1
        else:
        total_bb_coordinates = []
        tree = ET.parse(dir_xml, parser=ET.XMLParser(encoding="utf-8"))
--- a/src/eynollah/training/generate_gt_for_training.py
+++ b/src/eynollah/training/generate_gt_for_training.py
@ -73,14 +73,8 @@ def main():
    is_flag=True,
    help="if this parameter set to true, generated labels and in the case of provided org images cropping will be imposed and cropped labels and images will be written in output directories.",
 )
@click.option(
    "--page_alto",
    "-alto",
    is_flag=True,
    help="If this parameter is set to True, textline label generation is performed using PAGE/ALTO files. Otherwise, the default method for PAGE XML files is used.",
 )
-def pagexml2label(dir_xml,dir_out,type_output,config, printspace, dir_images, dir_out_images, page_alto):
+def pagexml2label(dir_xml,dir_out,type_output,config, printspace, dir_images, dir_out_images):
    if config:
        with open(config) as f:
            config_params = json.load(f)
@ -88,7 +82,7 @@ def pagexml2label(dir_xml,dir_out,type_output,config, printspace, dir_images, di
        print("passed")
        config_params = None
    gt_list = get_content_of_dir(dir_xml)
-    get_images_of_ground_truth(gt_list,dir_xml,dir_out,type_output, config, config_params, printspace, dir_images, dir_out_images, page_alto)
+    get_images_of_ground_truth(gt_list,dir_xml,dir_out,type_output, config, config_params, printspace, dir_images, dir_out_images)
@main.command()
@click.option(
--- a/src/eynollah/training/gt_gen_utils.py
+++ b/src/eynollah/training/gt_gen_utils.py
@ -686,7 +686,7 @@ def get_layout_contours_for_visualization(xml_file):
                co_noise.append(np.array(c_t_in))
    return co_text, co_graphic, co_sep, co_img, co_table, co_map, co_music, co_noise, y_len, x_len
-def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_file, config_params, printspace, dir_images, dir_out_images, page_alto=False):
+def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_file, config_params, printspace, dir_images, dir_out_images):
    """
    Reading the page xml files and write the ground truth images into given output directory.
    """
@ -699,19 +699,6 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
        print(gt_list[index])
        try:
            if page_alto:
                tree = ET.parse(dir_in+'/'+gt_list[index])
                root = tree.getroot()
                NS = {'alto': root.tag.split('}')[0].strip('{')}#{"alto": "http://www.loc.gov/standards/alto/ns-v4#"}
                x_len, y_len = 0, 0
                page = root.find('.//alto:Page', NS)
                x_len = int( page.get("WIDTH") )
                y_len = int( page.get("HEIGHT") )
            else:
            tree1 = ET.parse(dir_in+'/'+gt_list[index], parser = ET.XMLParser(encoding='utf-8'))
            root1=tree1.getroot()
            alltags=[elem.tag for elem in root1.iter()]
@ -797,24 +784,6 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_
                textline_rgb_color = (255, 0, 0)
                if page_alto:
                    co_use_case = []
                    for line in root.findall(".//alto:TextLine", NS):
                        string_el = line.find("alto:String", NS)
                        textline_text = string_el.attrib["CONTENT"] if string_el is not None else None
                        polygon_el = line.find("alto:Shape/alto:Polygon", NS)
                        if polygon_el is None:
                            continue
                        points = polygon_el.attrib["POINTS"].split()
                        coords = [
                            (int(points[i]), int(points[i + 1]))
                            for i in range(0, len(points), 2)
                        ]
                        co_use_case.append( np.array(coords, dtype=np.int32) )
                else:
                if config_params['use_case']=='textline':
                    region_tags = np.unique([x for x in alltags if x.endswith('TextLine')])
                elif config_params['use_case']=='word':