move line-gt extraction out of ocr to eynollah-training

2026-03-13 02:31:56 +01:00 · 2025-11-28 12:09:50 +01:00 · 2025-11-28 12:09:50 +01:00 · 30f9c695dc
commit 30f9c695dc
parent 951bd2fce6
4 changed files with 500 additions and 429 deletions
--- a/src/eynollah/cli/cli_ocr.py
+++ b/src/eynollah/cli/cli_ocr.py
@ -59,12 +59,6 @@ import click
    is_flag=True,
    help="if this parameter set to true, transformer ocr will be applied, otherwise cnn_rnn model.",
 )
-@click.option(
-    "--export_textline_images_and_text",
-    "-etit/-noetit",
-    is_flag=True,
-    help="if this parameter set to true, images and text in xml will be exported into output dir. This files can be used for training a OCR engine.",
-)
@click.option(
    "--do_not_mask_with_textline_contour",
    "-nmtc/-mtc",
@ -76,11 +70,6 @@ import click
    "-bs",
    help="number of inference batch size. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively",
 )
-@click.option(
-    "--dataset_abbrevation",
-    "-ds_pref",
-    help="in the case of extracting textline and text from a xml GT file user can add an abbrevation of dataset name to generated dataset",
-)
@click.option(
    "--min_conf_value_of_textline_text",
    "-min_conf",
@ -97,7 +86,6 @@ def ocr_cli(
    dir_out_image_text,
    overwrite,
    tr_ocr,
-    export_textline_images_and_text,
    do_not_mask_with_textline_contour,
    batch_size,
    dataset_abbrevation,
@ -106,18 +94,11 @@ def ocr_cli(
    """
    Recognize text with a CNN/RNN or transformer ML model.
    """
-    assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text  -etit can not be set alongside transformer ocr -tr_ocr"
-    # FIXME: refactor: move export_textline_images_and_text out of eynollah.py
-    # assert not export_textline_images_and_text or not model, "Exporting textline and text  -etit can not be set alongside model -m"
-    assert not export_textline_images_and_text or not batch_size, "Exporting textline and text  -etit can not be set alongside batch size -bs"
-    assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text  -etit can not be set alongside directory of bin images -dib"
-    assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text  -etit can not be set alongside directory of images with predicted text -doit"
-    assert bool(image) != bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both."
+    assert bool(image) ^ bool(dir_in), "Either -i (single image) or -di (directory) must be provided, but not both."
    from ..eynollah_ocr import Eynollah_ocr
    eynollah_ocr = Eynollah_ocr(
        model_zoo=ctx.obj.model_zoo,
        tr_ocr=tr_ocr,
-        export_textline_images_and_text=export_textline_images_and_text,
        do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
        batch_size=batch_size,
        pref_of_dataset=dataset_abbrevation,
--- a/src/eynollah/eynollah_ocr.py
+++ b/src/eynollah/eynollah_ocr.py
@ -9,17 +9,13 @@ from logging import Logger, getLogger
 from typing import Optional
 from pathlib import Path
 import os
-import json
 import gc
 import sys
 import math
 import time

-from keras.layers import StringLookup
 import cv2
 import xml.etree.ElementTree as ET
-import tensorflow as tf
-from keras.models import load_model
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 from eynollah.model_zoo import EynollahModelZoo
@ -48,11 +44,6 @@ if sys.version_info < (3, 10):
 else:
    import importlib.resources as importlib_resources

-try:
-    from transformers import TrOCRProcessor, VisionEncoderDecoderModel
-except ImportError:
-    TrOCRProcessor = VisionEncoderDecoderModel = None
-
 class Eynollah_ocr:
    def __init__(
        self,
@ -60,27 +51,16 @@ class Eynollah_ocr:
        model_zoo: EynollahModelZoo,
        tr_ocr=False,
        batch_size: Optional[int]=None,
-        export_textline_images_and_text: bool=False,
        do_not_mask_with_textline_contour: bool=False,
-        pref_of_dataset=None,
        min_conf_value_of_textline_text : Optional[float]=None,
        logger: Optional[Logger]=None,
    ):
        self.tr_ocr = tr_ocr
-        # For generating textline-image pairs for traning, move to generate_gt_for_training
-        self.export_textline_images_and_text = export_textline_images_and_text
        # masking for OCR and GT generation, relevant for skewed lines and bounding boxes
        self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
-        # prefix or dataset
-        self.pref_of_dataset = pref_of_dataset
        self.logger = logger if logger else getLogger('eynollah.ocr')
        self.model_zoo = model_zoo
        
-        # TODO: Properly document what 'export_textline_images_and_text' is about
-        if export_textline_images_and_text:
-            self.logger.info("export_textline_images_and_text was set, so no actual models are loaded")
-            return
-
        self.min_conf_value_of_textline_text = min_conf_value_of_textline_text if min_conf_value_of_textline_text else 0.3
        self.b_s = 2 if batch_size is None and tr_ocr else 8 if batch_size is None else batch_size

@ -539,40 +519,55 @@ class Eynollah_ocr:
                                    mask_poly = mask_poly[y:y+h, x:x+w, :]
                                    img_crop = img_poly_on_img[y:y+h, x:x+w, :]
                                    
-                                    if self.export_textline_images_and_text:
+                                    # print(file_name, angle_degrees, w*h,
+                                    #       mask_poly[:,:,0].sum(),
+                                    #       mask_poly[:,:,0].sum() /float(w*h) ,
+                                    #       'didi')
+                                    
+                                    if angle_degrees > 3:
+                                        better_des_slope = get_orientation_moments(textline_coords)
+                                        
+                                        img_crop = rotate_image_with_padding(img_crop, better_des_slope)
+                                        if dir_in_bin is not None:
+                                            img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope)
+                                            
+                                        mask_poly = rotate_image_with_padding(mask_poly, better_des_slope)
+                                        mask_poly = mask_poly.astype('uint8')
+                                        
+                                        #new bounding box
+                                        x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
+                                        
+                                        mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                        img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
+                                            
                                        if not self.do_not_mask_with_textline_contour:
                                            img_crop[mask_poly==0] = 255
-                                        
-                                    else:
-                                        # print(file_name, angle_degrees, w*h,
-                                        #       mask_poly[:,:,0].sum(),
-                                        #       mask_poly[:,:,0].sum() /float(w*h) ,
-                                        #       'didi')
-                                        
-                                        if angle_degrees > 3:
-                                            better_des_slope = get_orientation_moments(textline_coords)
-                                            
-                                            img_crop = rotate_image_with_padding(img_crop, better_des_slope)
-                                            if dir_in_bin is not None:
-                                                img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope)
-                                                
-                                            mask_poly = rotate_image_with_padding(mask_poly, better_des_slope)
-                                            mask_poly = mask_poly.astype('uint8')
-                                            
-                                            #new bounding box
-                                            x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
-                                            
-                                            mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                                            img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                                                
+                                        if dir_in_bin is not None:
+                                            img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
                                            if not self.do_not_mask_with_textline_contour:
-                                                img_crop[mask_poly==0] = 255
+                                                img_crop_bin[mask_poly==0] = 255
+                                        
+                                        if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
                                            if dir_in_bin is not None:
-                                                img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
-                                                if not self.do_not_mask_with_textline_contour:
-                                                    img_crop_bin[mask_poly==0] = 255
-                                            
-                                            if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
+                                                img_crop, img_crop_bin = \
+                                                    break_curved_line_into_small_pieces_and_then_merge(
+                                                        img_crop, mask_poly, img_crop_bin)
+                                            else:
+                                                img_crop, _ = \
+                                                    break_curved_line_into_small_pieces_and_then_merge(
+                                                        img_crop, mask_poly)
+    
+                                    else:
+                                        better_des_slope = 0
+                                        if not self.do_not_mask_with_textline_contour:
+                                            img_crop[mask_poly==0] = 255
+                                        if dir_in_bin is not None:
+                                            if not self.do_not_mask_with_textline_contour:
+                                                img_crop_bin[mask_poly==0] = 255
+                                        if type_textregion=='drop-capital':
+                                            pass
+                                        else:
+                                            if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
                                                if dir_in_bin is not None:
                                                    img_crop, img_crop_bin = \
                                                        break_curved_line_into_small_pieces_and_then_merge(
@ -581,188 +576,178 @@ class Eynollah_ocr:
                                                    img_crop, _ = \
                                                        break_curved_line_into_small_pieces_and_then_merge(
                                                            img_crop, mask_poly)
-        
-                                        else:
-                                            better_des_slope = 0
-                                            if not self.do_not_mask_with_textline_contour:
-                                                img_crop[mask_poly==0] = 255
-                                            if dir_in_bin is not None:
-                                                if not self.do_not_mask_with_textline_contour:
-                                                    img_crop_bin[mask_poly==0] = 255
-                                            if type_textregion=='drop-capital':
-                                                pass
-                                            else:
-                                                if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
-                                                    if dir_in_bin is not None:
-                                                        img_crop, img_crop_bin = \
-                                                            break_curved_line_into_small_pieces_and_then_merge(
-                                                                img_crop, mask_poly, img_crop_bin)
-                                                    else:
-                                                        img_crop, _ = \
-                                                            break_curved_line_into_small_pieces_and_then_merge(
-                                                                img_crop, mask_poly)
                                    
-                                    if not self.export_textline_images_and_text:
-                                        if w_scaled < 750:#1.5*image_width:
+                                    if w_scaled < 750:#1.5*image_width:
+                                        img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                            img_crop, image_height, image_width)
+                                        cropped_lines.append(img_fin)
+                                        if abs(better_des_slope) > 45:
+                                            cropped_lines_ver_index.append(1)
+                                        else:
+                                            cropped_lines_ver_index.append(0)
+                                            
+                                        cropped_lines_meging_indexing.append(0)
+                                        if dir_in_bin is not None:
                                            img_fin = preprocess_and_resize_image_for_ocrcnn_model(
-                                                img_crop, image_height, image_width)
+                                                img_crop_bin, image_height, image_width)
+                                            cropped_lines_bin.append(img_fin)
+                                    else:
+                                        splited_images, splited_images_bin = return_textlines_split_if_needed(
+                                            img_crop, img_crop_bin if dir_in_bin is not None else None)
+                                        if splited_images:
+                                            img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                splited_images[0], image_height, image_width)
                                            cropped_lines.append(img_fin)
+                                            cropped_lines_meging_indexing.append(1)
+                                            
                                            if abs(better_des_slope) > 45:
                                                cropped_lines_ver_index.append(1)
                                            else:
                                                cropped_lines_ver_index.append(0)
+                                            
+                                            img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                splited_images[1], image_height, image_width)
+                                            
+                                            cropped_lines.append(img_fin)
+                                            cropped_lines_meging_indexing.append(-1)
+                                            
+                                            if abs(better_des_slope) > 45:
+                                                cropped_lines_ver_index.append(1)
+                                            else:
+                                                cropped_lines_ver_index.append(0)
+                                            
+                                            if dir_in_bin is not None:
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                    splited_images_bin[0], image_height, image_width)
+                                                cropped_lines_bin.append(img_fin)
+                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                    splited_images_bin[1], image_height, image_width)
+                                                cropped_lines_bin.append(img_fin)
                                                
+                                        else:
+                                            img_fin = preprocess_and_resize_image_for_ocrcnn_model(
+                                                img_crop, image_height, image_width)
+                                            cropped_lines.append(img_fin)
                                            cropped_lines_meging_indexing.append(0)
+                                            
+                                            if abs(better_des_slope) > 45:
+                                                cropped_lines_ver_index.append(1)
+                                            else:
+                                                cropped_lines_ver_index.append(0)
+                                            
                                            if dir_in_bin is not None:
                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(
                                                    img_crop_bin, image_height, image_width)
                                                cropped_lines_bin.append(img_fin)
-                                        else:
-                                            splited_images, splited_images_bin = return_textlines_split_if_needed(
-                                                img_crop, img_crop_bin if dir_in_bin is not None else None)
-                                            if splited_images:
-                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(
-                                                    splited_images[0], image_height, image_width)
-                                                cropped_lines.append(img_fin)
-                                                cropped_lines_meging_indexing.append(1)
-                                                
-                                                if abs(better_des_slope) > 45:
-                                                    cropped_lines_ver_index.append(1)
-                                                else:
-                                                    cropped_lines_ver_index.append(0)
-                                                
-                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(
-                                                    splited_images[1], image_height, image_width)
-                                                
-                                                cropped_lines.append(img_fin)
-                                                cropped_lines_meging_indexing.append(-1)
-                                                
-                                                if abs(better_des_slope) > 45:
-                                                    cropped_lines_ver_index.append(1)
-                                                else:
-                                                    cropped_lines_ver_index.append(0)
-                                                
-                                                if dir_in_bin is not None:
-                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(
-                                                        splited_images_bin[0], image_height, image_width)
-                                                    cropped_lines_bin.append(img_fin)
-                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(
-                                                        splited_images_bin[1], image_height, image_width)
-                                                    cropped_lines_bin.append(img_fin)
-                                                    
-                                            else:
-                                                img_fin = preprocess_and_resize_image_for_ocrcnn_model(
-                                                    img_crop, image_height, image_width)
-                                                cropped_lines.append(img_fin)
-                                                cropped_lines_meging_indexing.append(0)
-                                                
-                                                if abs(better_des_slope) > 45:
-                                                    cropped_lines_ver_index.append(1)
-                                                else:
-                                                    cropped_lines_ver_index.append(0)
-                                                
-                                                if dir_in_bin is not None:
-                                                    img_fin = preprocess_and_resize_image_for_ocrcnn_model(
-                                                        img_crop_bin, image_height, image_width)
-                                                    cropped_lines_bin.append(img_fin)
-                                        
-                                if self.export_textline_images_and_text:
-                                    if img_crop.shape[0]==0 or img_crop.shape[1]==0:
-                                        pass
-                                    else:
-                                        if child_textlines.tag.endswith("TextEquiv"):
-                                            for cheild_text in child_textlines:
-                                                if cheild_text.tag.endswith("Unicode"):
-                                                    textline_text = cheild_text.text
-                                                    if textline_text:
-                                                        base_name = os.path.join(
-                                                            dir_out, file_name + '_line_' + str(indexer_textlines))
-                                                        if self.pref_of_dataset:
-                                                            base_name += '_' + self.pref_of_dataset
-                                                        if not self.do_not_mask_with_textline_contour:
-                                                            base_name += '_masked'
-                                                            
-                                                        with open(base_name + '.txt', 'w') as text_file:
-                                                            text_file.write(textline_text)
-                                                        cv2.imwrite(base_name + '.png', img_crop)
-                                                    indexer_textlines+=1
-
-                    if not self.export_textline_images_and_text:
-                        indexer_text_region = indexer_text_region +1
-                    
-                if not self.export_textline_images_and_text:
-                    extracted_texts = []
-                    extracted_conf_value = []
-
-                    n_iterations  = math.ceil(len(cropped_lines) / self.b_s) 
-
-                    for i in range(n_iterations):
-                        if i==(n_iterations-1):
-                            n_start = i*self.b_s
-                            imgs = cropped_lines[n_start:]
-                            imgs = np.array(imgs)
-                            imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
-                            
-                            ver_imgs = np.array( cropped_lines_ver_index[n_start:] )
-                            indices_ver = np.where(ver_imgs == 1)[0]
-                            
-                            #print(indices_ver, 'indices_ver')
-                            if len(indices_ver)>0:
-                                imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
-                                imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
-                                #print(imgs_ver_flipped, 'imgs_ver_flipped')
-                                
-                            else:
-                                imgs_ver_flipped = None
-                            
-                            if dir_in_bin is not None:
-                                imgs_bin = cropped_lines_bin[n_start:]
-                                imgs_bin = np.array(imgs_bin)
-                                imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3)
-                                
-                                if len(indices_ver)>0:
-                                    imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
-                                    imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
-                                    #print(imgs_ver_flipped, 'imgs_ver_flipped')
                                    
-                                else:
-                                    imgs_bin_ver_flipped = None
-                        else:
-                            n_start = i*self.b_s
-                            n_end = (i+1)*self.b_s
-                            imgs = cropped_lines[n_start:n_end]
-                            imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3)
+
+                    indexer_text_region = indexer_text_region +1
+                    
+                extracted_texts = []
+                extracted_conf_value = []
+
+                n_iterations  = math.ceil(len(cropped_lines) / self.b_s) 
+
+                for i in range(n_iterations):
+                    if i==(n_iterations-1):
+                        n_start = i*self.b_s
+                        imgs = cropped_lines[n_start:]
+                        imgs = np.array(imgs)
+                        imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
+                        
+                        ver_imgs = np.array( cropped_lines_ver_index[n_start:] )
+                        indices_ver = np.where(ver_imgs == 1)[0]
+                        
+                        #print(indices_ver, 'indices_ver')
+                        if len(indices_ver)>0:
+                            imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
+                            imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
+                            #print(imgs_ver_flipped, 'imgs_ver_flipped')
                            
-                            ver_imgs = np.array( cropped_lines_ver_index[n_start:n_end] )
-                            indices_ver = np.where(ver_imgs == 1)[0]
-                            #print(indices_ver, 'indices_ver')
+                        else:
+                            imgs_ver_flipped = None
+                        
+                        if dir_in_bin is not None:
+                            imgs_bin = cropped_lines_bin[n_start:]
+                            imgs_bin = np.array(imgs_bin)
+                            imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3)
                            
                            if len(indices_ver)>0:
-                                imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
-                                imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
+                                imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
+                                imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
                                #print(imgs_ver_flipped, 'imgs_ver_flipped')
+                                
                            else:
-                                imgs_ver_flipped = None
-
-                            
-                            if dir_in_bin is not None:
-                                imgs_bin = cropped_lines_bin[n_start:n_end]
-                                imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
-                                
-                                
-                                if len(indices_ver)>0:
-                                    imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
-                                    imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
-                                    #print(imgs_ver_flipped, 'imgs_ver_flipped')
-                                else:
-                                    imgs_bin_ver_flipped = None
-                            
-
-                        self.logger.debug("processing next %d lines", len(imgs))
-                        preds = self.model_zoo.get('ocr').predict(imgs, verbose=0)
+                                imgs_bin_ver_flipped = None
+                    else:
+                        n_start = i*self.b_s
+                        n_end = (i+1)*self.b_s
+                        imgs = cropped_lines[n_start:n_end]
+                        imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3)
+                        
+                        ver_imgs = np.array( cropped_lines_ver_index[n_start:n_end] )
+                        indices_ver = np.where(ver_imgs == 1)[0]
+                        #print(indices_ver, 'indices_ver')
                        
                        if len(indices_ver)>0:
-                            preds_flipped = self.model_zoo.get('ocr').predict(imgs_ver_flipped, verbose=0)
+                            imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
+                            imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
+                            #print(imgs_ver_flipped, 'imgs_ver_flipped')
+                        else:
+                            imgs_ver_flipped = None
+
+                        
+                        if dir_in_bin is not None:
+                            imgs_bin = cropped_lines_bin[n_start:n_end]
+                            imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
+                            
+                            
+                            if len(indices_ver)>0:
+                                imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
+                                imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
+                                #print(imgs_ver_flipped, 'imgs_ver_flipped')
+                            else:
+                                imgs_bin_ver_flipped = None
+                        
+
+                    self.logger.debug("processing next %d lines", len(imgs))
+                    preds = self.model_zoo.get('ocr').predict(imgs, verbose=0)
+                    
+                    if len(indices_ver)>0:
+                        preds_flipped = self.model_zoo.get('ocr').predict(imgs_ver_flipped, verbose=0)
+                        preds_max_fliped = np.max(preds_flipped, axis=2 )
+                        preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
+                        pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character
+                        masked_means_flipped = \
+                            np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \
+                            np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
+                        masked_means_flipped[np.isnan(masked_means_flipped)] = 0
+                        
+                        preds_max = np.max(preds, axis=2 )
+                        preds_max_args = np.argmax(preds, axis=2 )
+                        pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
+                        
+                        masked_means = \
+                            np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
+                            np.sum(pred_max_not_unk_mask_bool, axis=1)
+                        masked_means[np.isnan(masked_means)] = 0
+                        
+                        masked_means_ver = masked_means[indices_ver]
+                        #print(masked_means_ver, 'pred_max_not_unk')
+                        
+                        indices_where_flipped_conf_value_is_higher = \
+                            np.where(masked_means_flipped > masked_means_ver)[0]
+                        
+                        #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
+                        if len(indices_where_flipped_conf_value_is_higher)>0:
+                            indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
+                            preds[indices_to_be_replaced,:,:] = \
+                                preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
+                    if dir_in_bin is not None:
+                        preds_bin = self.model_zoo.get('ocr').predict(imgs_bin, verbose=0)
+                        
+                        if len(indices_ver)>0:
+                            preds_flipped = self.model_zoo.get('ocr').predict(imgs_bin_ver_flipped, verbose=0)
                            preds_max_fliped = np.max(preds_flipped, axis=2 )
                            preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
                            pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character
@ -789,212 +774,179 @@ class Eynollah_ocr:
                            #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
                            if len(indices_where_flipped_conf_value_is_higher)>0:
                                indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
-                                preds[indices_to_be_replaced,:,:] = \
+                                preds_bin[indices_to_be_replaced,:,:] = \
                                    preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
-                        if dir_in_bin is not None:
-                            preds_bin = self.model_zoo.get('ocr').predict(imgs_bin, verbose=0)
-                            
-                            if len(indices_ver)>0:
-                                preds_flipped = self.model_zoo.get('ocr').predict(imgs_bin_ver_flipped, verbose=0)
-                                preds_max_fliped = np.max(preds_flipped, axis=2 )
-                                preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
-                                pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character
-                                masked_means_flipped = \
-                                    np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \
-                                    np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
-                                masked_means_flipped[np.isnan(masked_means_flipped)] = 0
-                                
-                                preds_max = np.max(preds, axis=2 )
-                                preds_max_args = np.argmax(preds, axis=2 )
-                                pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
-                                
-                                masked_means = \
-                                    np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
-                                    np.sum(pred_max_not_unk_mask_bool, axis=1)
-                                masked_means[np.isnan(masked_means)] = 0
-                                
-                                masked_means_ver = masked_means[indices_ver]
-                                #print(masked_means_ver, 'pred_max_not_unk')
-                                
-                                indices_where_flipped_conf_value_is_higher = \
-                                    np.where(masked_means_flipped > masked_means_ver)[0]
-                                
-                                #print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
-                                if len(indices_where_flipped_conf_value_is_higher)>0:
-                                    indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
-                                    preds_bin[indices_to_be_replaced,:,:] = \
-                                        preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
-                            
-                            preds = (preds + preds_bin) / 2.
-
-                        pred_texts = decode_batch_predictions(preds, self.model_zoo.get('num_to_char'))
                        
-                        preds_max = np.max(preds, axis=2 )
-                        preds_max_args = np.argmax(preds, axis=2 )
-                        pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
-                        masked_means = \
-                            np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
-                            np.sum(pred_max_not_unk_mask_bool, axis=1)
+                        preds = (preds + preds_bin) / 2.

-                        for ib in range(imgs.shape[0]):
-                            pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
-                            if masked_means[ib] >= self.min_conf_value_of_textline_text:
-                                extracted_texts.append(pred_texts_ib)
-                                extracted_conf_value.append(masked_means[ib])
-                            else:
-                                extracted_texts.append("")
-                                extracted_conf_value.append(0)
-                    del cropped_lines
-                    if dir_in_bin is not None:
-                        del cropped_lines_bin
-                    gc.collect()
+                    pred_texts = decode_batch_predictions(preds, self.model_zoo.get('num_to_char'))
                    
-                    extracted_texts_merged = [extracted_texts[ind]
-                                              if cropped_lines_meging_indexing[ind]==0
-                                              else extracted_texts[ind]+" "+extracted_texts[ind+1]
-                                              if cropped_lines_meging_indexing[ind]==1
-                                              else None
-                                              for ind in range(len(cropped_lines_meging_indexing))]
-                    
-                    extracted_conf_value_merged = [extracted_conf_value[ind]
-                                                   if cropped_lines_meging_indexing[ind]==0
-                                                   else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2.
-                                                   if cropped_lines_meging_indexing[ind]==1
-                                                   else None
-                                                   for ind in range(len(cropped_lines_meging_indexing))]
+                    preds_max = np.max(preds, axis=2 )
+                    preds_max_args = np.argmax(preds, axis=2 )
+                    pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
+                    masked_means = \
+                        np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
+                        np.sum(pred_max_not_unk_mask_bool, axis=1)

-                    extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm]
-                                                   for ind_cfm in range(len(extracted_texts_merged))
-                                                   if extracted_texts_merged[ind_cfm] is not None]
-                    extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
-                    unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
-                    
-                    if dir_out_image_text:
-                        #font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
-                        font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
-                        with importlib_resources.as_file(font) as font:
-                            font = ImageFont.truetype(font=font, size=40)
-                        
-                        for indexer_text, bb_ind in enumerate(total_bb_coordinates):
-                            x_bb = bb_ind[0]
-                            y_bb = bb_ind[1]
-                            w_bb = bb_ind[2]
-                            h_bb = bb_ind[3]
-                            
-                            font = fit_text_single_line(draw, extracted_texts_merged[indexer_text],
-                                                        font.path, w_bb, int(h_bb*0.4) )
-                            
-                            ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
-                            
-                            text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font)
-                            text_width = text_bbox[2] - text_bbox[0]
-                            text_height = text_bbox[3] - text_bbox[1]
-
-                            text_x = x_bb + (w_bb - text_width) // 2  # Center horizontally
-                            text_y = y_bb + (h_bb - text_height) // 2  # Center vertically
-
-                            # Draw the text
-                            draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font)
-                        image_text.save(out_image_with_text)
-
-                    text_by_textregion = []
-                    for ind in unique_cropped_lines_region_indexer:
-                        ind = np.array(cropped_lines_region_indexer)==ind
-                        extracted_texts_merged_un = np.array(extracted_texts_merged)[ind]
-                        if len(extracted_texts_merged_un)>1:
-                            text_by_textregion_ind = ""
-                            next_glue = ""
-                            for indt in range(len(extracted_texts_merged_un)):
-                                if (extracted_texts_merged_un[indt].endswith('⸗') or
-                                    extracted_texts_merged_un[indt].endswith('-') or
-                                    extracted_texts_merged_un[indt].endswith('¬')):
-                                    text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1]
-                                    next_glue = ""
-                                else:
-                                    text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt]
-                                    next_glue = " "
-                            text_by_textregion.append(text_by_textregion_ind)
+                    for ib in range(imgs.shape[0]):
+                        pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
+                        if masked_means[ib] >= self.min_conf_value_of_textline_text:
+                            extracted_texts.append(pred_texts_ib)
+                            extracted_conf_value.append(masked_means[ib])
                        else:
-                            text_by_textregion.append(" ".join(extracted_texts_merged_un))
-                        #print(text_by_textregion, 'text_by_textregiontext_by_textregiontext_by_textregiontext_by_textregiontext_by_textregion')
+                            extracted_texts.append("")
+                            extracted_conf_value.append(0)
+                del cropped_lines
+                if dir_in_bin is not None:
+                    del cropped_lines_bin
+                gc.collect()
+                
+                extracted_texts_merged = [extracted_texts[ind]
+                                            if cropped_lines_meging_indexing[ind]==0
+                                            else extracted_texts[ind]+" "+extracted_texts[ind+1]
+                                            if cropped_lines_meging_indexing[ind]==1
+                                            else None
+                                            for ind in range(len(cropped_lines_meging_indexing))]
+                
+                extracted_conf_value_merged = [extracted_conf_value[ind]
+                                                if cropped_lines_meging_indexing[ind]==0
+                                                else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2.
+                                                if cropped_lines_meging_indexing[ind]==1
+                                                else None
+                                                for ind in range(len(cropped_lines_meging_indexing))]

-                    ###index_tot_regions = []
-                    ###tot_region_ref = []
+                extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm]
+                                                for ind_cfm in range(len(extracted_texts_merged))
+                                                if extracted_texts_merged[ind_cfm] is not None]
+                extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
+                unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
+                
+                if dir_out_image_text:
+                    #font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
+                    font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
+                    with importlib_resources.as_file(font) as font:
+                        font = ImageFont.truetype(font=font, size=40)
+                    
+                    for indexer_text, bb_ind in enumerate(total_bb_coordinates):
+                        x_bb = bb_ind[0]
+                        y_bb = bb_ind[1]
+                        w_bb = bb_ind[2]
+                        h_bb = bb_ind[3]
+                        
+                        font = fit_text_single_line(draw, extracted_texts_merged[indexer_text],
+                                                    font.path, w_bb, int(h_bb*0.4) )
+                        
+                        ##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
+                        
+                        text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font)
+                        text_width = text_bbox[2] - text_bbox[0]
+                        text_height = text_bbox[3] - text_bbox[1]

-                    ###for jj in root1.iter(link+'RegionRefIndexed'):
-                        ###index_tot_regions.append(jj.attrib['index'])
-                        ###tot_region_ref.append(jj.attrib['regionRef'])
-                        
-                    ###id_to_order = {tid: ro for tid, ro in zip(tot_region_ref, index_tot_regions)}
-        
-                    #id_textregions = []
-                    #textregions_by_existing_ids = []
-                    indexer = 0
-                    indexer_textregion = 0
-                    for nn in root1.iter(region_tags):
-                        #id_textregion = nn.attrib['id']
-                        #id_textregions.append(id_textregion)
-                        #textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
-                        
-                        is_textregion_text = False
-                        for childtest in nn:
-                            if childtest.tag.endswith("TextEquiv"):
-                                is_textregion_text = True
-                        
-                        if not is_textregion_text:
-                            text_subelement_textregion = ET.SubElement(nn, 'TextEquiv')
-                            unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode')
+                        text_x = x_bb + (w_bb - text_width) // 2  # Center horizontally
+                        text_y = y_bb + (h_bb - text_height) // 2  # Center vertically

-                        
-                        has_textline = False
-                        for child_textregion in nn:
-                            if child_textregion.tag.endswith("TextLine"):
-                                
-                                is_textline_text = False
-                                for childtest2 in child_textregion:
-                                    if childtest2.tag.endswith("TextEquiv"):
-                                        is_textline_text = True
-                                
-                                
-                                if not is_textline_text:
-                                    text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
-                                    text_subelement.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
-                                    unicode_textline = ET.SubElement(text_subelement, 'Unicode')
-                                    unicode_textline.text = extracted_texts_merged[indexer]
-                                else:
-                                    for childtest3 in child_textregion:
-                                        if childtest3.tag.endswith("TextEquiv"):
-                                            for child_uc in childtest3:
-                                                if child_uc.tag.endswith("Unicode"):
-                                                    childtest3.set('conf',
-                                                                   f"{extracted_conf_value_merged[indexer]:.2f}")
-                                                    child_uc.text = extracted_texts_merged[indexer]
-                                        
-                                indexer = indexer + 1
-                                has_textline = True
-                        if has_textline:
-                            if is_textregion_text:
-                                for child4 in nn:
-                                    if child4.tag.endswith("TextEquiv"):
-                                        for childtr_uc in child4:
-                                            if childtr_uc.tag.endswith("Unicode"):
-                                                childtr_uc.text = text_by_textregion[indexer_textregion]
+                        # Draw the text
+                        draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font)
+                    image_text.save(out_image_with_text)
+
+                text_by_textregion = []
+                for ind in unique_cropped_lines_region_indexer:
+                    ind = np.array(cropped_lines_region_indexer)==ind
+                    extracted_texts_merged_un = np.array(extracted_texts_merged)[ind]
+                    if len(extracted_texts_merged_un)>1:
+                        text_by_textregion_ind = ""
+                        next_glue = ""
+                        for indt in range(len(extracted_texts_merged_un)):
+                            if (extracted_texts_merged_un[indt].endswith('⸗') or
+                                extracted_texts_merged_un[indt].endswith('-') or
+                                extracted_texts_merged_un[indt].endswith('¬')):
+                                text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1]
+                                next_glue = ""
                            else:
-                                unicode_textregion.text = text_by_textregion[indexer_textregion]
-                            indexer_textregion = indexer_textregion + 1
+                                text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt]
+                                next_glue = " "
+                        text_by_textregion.append(text_by_textregion_ind)
+                    else:
+                        text_by_textregion.append(" ".join(extracted_texts_merged_un))
+                    #print(text_by_textregion, 'text_by_textregiontext_by_textregiontext_by_textregiontext_by_textregiontext_by_textregion')
+
+                ###index_tot_regions = []
+                ###tot_region_ref = []
+
+                ###for jj in root1.iter(link+'RegionRefIndexed'):
+                    ###index_tot_regions.append(jj.attrib['index'])
+                    ###tot_region_ref.append(jj.attrib['regionRef'])
+                    
+                ###id_to_order = {tid: ro for tid, ro in zip(tot_region_ref, index_tot_regions)}
+    
+                #id_textregions = []
+                #textregions_by_existing_ids = []
+                indexer = 0
+                indexer_textregion = 0
+                for nn in root1.iter(region_tags):
+                    #id_textregion = nn.attrib['id']
+                    #id_textregions.append(id_textregion)
+                    #textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
+                    
+                    is_textregion_text = False
+                    for childtest in nn:
+                        if childtest.tag.endswith("TextEquiv"):
+                            is_textregion_text = True
+                    
+                    if not is_textregion_text:
+                        text_subelement_textregion = ET.SubElement(nn, 'TextEquiv')
+                        unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode')
+
+                    
+                    has_textline = False
+                    for child_textregion in nn:
+                        if child_textregion.tag.endswith("TextLine"):
                            
-                    ###sample_order  = [(id_to_order[tid], text)
-                    ###                 for tid, text in zip(id_textregions, textregions_by_existing_ids)
-                    ###                 if tid in id_to_order]
-                    
-                    ##ordered_texts_sample = [text for _, text in sorted(sample_order)]
-                    ##tot_page_text = ' '.join(ordered_texts_sample)
-                    
-                    ##for page_element in root1.iter(link+'Page'):
-                        ##text_page = ET.SubElement(page_element, 'TextEquiv')
-                        ##unicode_textpage = ET.SubElement(text_page, 'Unicode')
-                        ##unicode_textpage.text = tot_page_text
-                    
-                    ET.register_namespace("",name_space)
-                    tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None)
-                    #print("Job done in %.1fs", time.time() - t0)
+                            is_textline_text = False
+                            for childtest2 in child_textregion:
+                                if childtest2.tag.endswith("TextEquiv"):
+                                    is_textline_text = True
+                            
+                            
+                            if not is_textline_text:
+                                text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
+                                text_subelement.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
+                                unicode_textline = ET.SubElement(text_subelement, 'Unicode')
+                                unicode_textline.text = extracted_texts_merged[indexer]
+                            else:
+                                for childtest3 in child_textregion:
+                                    if childtest3.tag.endswith("TextEquiv"):
+                                        for child_uc in childtest3:
+                                            if child_uc.tag.endswith("Unicode"):
+                                                childtest3.set('conf',
+                                                                f"{extracted_conf_value_merged[indexer]:.2f}")
+                                                child_uc.text = extracted_texts_merged[indexer]
+                                    
+                            indexer = indexer + 1
+                            has_textline = True
+                    if has_textline:
+                        if is_textregion_text:
+                            for child4 in nn:
+                                if child4.tag.endswith("TextEquiv"):
+                                    for childtr_uc in child4:
+                                        if childtr_uc.tag.endswith("Unicode"):
+                                            childtr_uc.text = text_by_textregion[indexer_textregion]
+                        else:
+                            unicode_textregion.text = text_by_textregion[indexer_textregion]
+                        indexer_textregion = indexer_textregion + 1
+                        
+                ###sample_order  = [(id_to_order[tid], text)
+                ###                 for tid, text in zip(id_textregions, textregions_by_existing_ids)
+                ###                 if tid in id_to_order]
+                
+                ##ordered_texts_sample = [text for _, text in sorted(sample_order)]
+                ##tot_page_text = ' '.join(ordered_texts_sample)
+                
+                ##for page_element in root1.iter(link+'Page'):
+                    ##text_page = ET.SubElement(page_element, 'TextEquiv')
+                    ##unicode_textpage = ET.SubElement(text_page, 'Unicode')
+                    ##unicode_textpage.text = tot_page_text
+                
+                ET.register_namespace("",name_space)
+                tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None)
+                #print("Job done in %.1fs", time.time() - t0)
--- a/src/eynollah/training/cli.py
+++ b/src/eynollah/training/cli.py
@ -8,6 +8,7 @@ from .build_model_load_pretrained_weights_and_save import build_model_load_pretr
 from .generate_gt_for_training import main as generate_gt_cli
 from .inference import main as inference_cli
 from .train import ex
+from .extract_line_gt import linegt_cli

@click.command(context_settings=dict(
        ignore_unknown_options=True,
@ -24,3 +25,4 @@ main.add_command(build_model_load_pretrained_weights_and_save)
 main.add_command(generate_gt_cli, 'generate-gt')
 main.add_command(inference_cli, 'inference')
 main.add_command(train_cli, 'train')
+main.add_command(linegt_cli, 'export_textline_images_and_text')
--- a/src/eynollah/training/extract_line_gt.py
+++ b/src/eynollah/training/extract_line_gt.py
@ -0,0 +1,136 @@
+from logging import Logger, getLogger
+from typing import Optional
+from pathlib import Path
+import os
+
+import click
+import cv2
+import xml.etree.ElementTree as ET
+import numpy as np
+
+from ..utils import is_image_filename
+
+@click.command()
+@click.option(
+    "--image",
+    "-i",
+    help="input image filename",
+    type=click.Path(exists=True, dir_okay=False),
+)
+@click.option(
+    "--dir_in",
+    "-di",
+    'image_filename',
+    help="directory of input images (instead of --image)",
+    type=click.Path(exists=True, file_okay=False),
+)
+@click.option(
+    "--dir_xmls",
+    "-dx",
+    help="directory of input PAGE-XML files (in addition to --dir_in; filename stems must match the image files, with '.xml' suffix).",
+    type=click.Path(exists=True, file_okay=False),
+    required=True,
+)
+@click.option(
+    "--out",
+    "-o",
+    'dir_out',
+    help="directory for output PAGE-XML files",
+    type=click.Path(exists=True, file_okay=False),
+    required=True,
+)
+@click.option(
+    "--dataset_abbrevation",
+    "-ds_pref",
+    'pref_of_dataset',
+    help="in the case of extracting textline and text from a xml GT file user can add an abbrevation of dataset name to generated dataset",
+)
+@click.option(
+    "--do_not_mask_with_textline_contour",
+    "-nmtc/-mtc",
+    is_flag=True,
+    help="if this parameter set to true, cropped textline images will not be masked with textline contour.",
+)
+def linegt_cli(
+    image_filename,
+    dir_in,
+    dir_xmls,
+    dir_out,
+    pref_of_dataset,
+    do_not_mask_with_textline_contour,
+):
+    assert bool(dir_in) ^ bool(image_filename), "Set --dir-in or --image-filename, not both"
+    if dir_in:
+        ls_imgs = [
+            os.path.join(dir_in, image_filename) for image_filename in filter(is_image_filename, os.listdir(dir_in))
+        ]
+    else:
+        assert image_filename
+        ls_imgs = [image_filename]
+
+    for dir_img in ls_imgs:
+        file_name = Path(dir_img).stem
+        dir_xml = os.path.join(dir_xmls, file_name + '.xml')
+
+        img = cv2.imread(dir_img)
+
+        total_bb_coordinates = []
+
+        tree1 = ET.parse(dir_xml, parser=ET.XMLParser(encoding="utf-8"))
+        root1 = tree1.getroot()
+        alltags = [elem.tag for elem in root1.iter()]
+
+        name_space = alltags[0].split('}')[0]
+        name_space = name_space.split('{')[1]
+
+        region_tags = [x for x in alltags if x.endswith('TextRegion')][0]
+
+        cropped_lines_region_indexer = []
+
+        indexer_text_region = 0
+        indexer_textlines = 0
+        # FIXME: non recursive, use OCR-D PAGE generateDS API. Or use an existing tool for this purpose altogether
+        for nn in root1.iter(region_tags):
+            for child_textregion in nn:
+                if child_textregion.tag.endswith("TextLine"):
+                    for child_textlines in child_textregion:
+                        if child_textlines.tag.endswith("Coords"):
+                            cropped_lines_region_indexer.append(indexer_text_region)
+                            p_h = child_textlines.attrib['points'].split(' ')
+                            textline_coords = np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])
+
+                            x, y, w, h = cv2.boundingRect(textline_coords)
+
+                            total_bb_coordinates.append([x, y, w, h])
+
+                            img_poly_on_img = np.copy(img)
+
+                            mask_poly = np.zeros(img.shape)
+                            mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1))
+
+                            mask_poly = mask_poly[y : y + h, x : x + w, :]
+                            img_crop = img_poly_on_img[y : y + h, x : x + w, :]
+
+                            if not do_not_mask_with_textline_contour:
+                                img_crop[mask_poly == 0] = 255
+
+                            if img_crop.shape[0] == 0 or img_crop.shape[1] == 0:
+                                continue
+
+                            if child_textlines.tag.endswith("TextEquiv"):
+                                for cheild_text in child_textlines:
+                                    if cheild_text.tag.endswith("Unicode"):
+                                        textline_text = cheild_text.text
+                                        if textline_text:
+                                            base_name = os.path.join(
+                                                dir_out, file_name + '_line_' + str(indexer_textlines)
+                                            )
+                                            if pref_of_dataset:
+                                                base_name += '_' + pref_of_dataset
+                                            if not do_not_mask_with_textline_contour:
+                                                base_name += '_masked'
+
+                                            with open(base_name + '.txt', 'w') as text_file:
+                                                text_file.write(textline_text)
+                                            cv2.imwrite(base_name + '.png', img_crop)
+                                        indexer_textlines += 1