eynollah ocr: support using either a specific model name or a models directory (default model)

2025-12-16 08:04:14 +01:00 · 2025-08-28 11:30:59 +02:00 · 2025-08-28 11:30:59 +02:00 · fdcae8dd6e
commit fdcae8dd6e
parent 7dd281267d
2 changed files with 26 additions and 20 deletions
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@ -456,6 +456,11 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
    help="directory of models",
    type=click.Path(exists=True, file_okay=False),
 )
@click.option(
    "--model_name",
    help="Specific model file path to use for OCR",
    type=click.Path(exists=True, file_okay=False),
 )
@click.option(
    "--tr_ocr",
    "-trocr/-notrocr",
@ -474,12 +479,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
    is_flag=True,
    help="if this parameter set to true, cropped textline images will not be masked with textline contour.",
 )
@click.option(
    "--draw_texts_on_image",
    "-dtoi/-ndtoi",
    is_flag=True,
    help="if this parameter set to true, the predicted texts will be displayed on an image.",
 )
@click.option(
    "--prediction_with_both_of_rgb_and_bin",
    "-brb/-nbrb",
@ -508,16 +507,17 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
    help="Override log level globally to this",
 )
-def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level):
+def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level):
    initLogging()
    if log_level:
        getLogger('eynollah').setLevel(getLevelName(log_level))
    assert not model or not model_name, "model directory  -m can not be set alongside specific model name --model_name"
    assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text  -etit can not be set alongside transformer ocr -tr_ocr"
    assert not export_textline_images_and_text or not model, "Exporting textline and text  -etit can not be set alongside model -m"
    assert not export_textline_images_and_text or not batch_size, "Exporting textline and text  -etit can not be set alongside batch size -bs"
    assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text  -etit can not be set alongside directory of bin images -dib"
    assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text  -etit can not be set alongside directory of images with predicted text -doit"
    assert not export_textline_images_and_text or not draw_texts_on_image, "Exporting textline and text  -etit can not be set alongside draw text on image -dtoi"
    assert not export_textline_images_and_text or not prediction_with_both_of_rgb_and_bin, "Exporting textline and text  -etit can not be set alongside prediction with both rgb and bin -brb"
    assert (bool(image) ^ bool(dir_in)), "Either -i (single image) or -di (directory) must be provided, but not both."
    eynollah_ocr = Eynollah_ocr(
@ -528,10 +528,10 @@ def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text,
        dir_in_bin=dir_in_bin,
        dir_out=out,
        dir_models=model,
        model_name=model_name,
        tr_ocr=tr_ocr,
        export_textline_images_and_text=export_textline_images_and_text,
        do_not_mask_with_textline_contour=do_not_mask_with_textline_contour,
        draw_texts_on_image=draw_texts_on_image,
        prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin,
        batch_size=batch_size,
        pref_of_dataset=dataset_abbrevation,
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@ -5171,6 +5171,7 @@ class Eynollah_ocr:
    def __init__(
        self,
        dir_models,
        model_name=None,
        dir_xmls=None,
        dir_in=None,
        image_filename=None,
@ -5181,7 +5182,6 @@ class Eynollah_ocr:
        batch_size=None,
        export_textline_images_and_text=False,
        do_not_mask_with_textline_contour=False,
        draw_texts_on_image=False,
        prediction_with_both_of_rgb_and_bin=False,
        pref_of_dataset=None,
        min_conf_value_of_textline_text : Optional[float]=None,
@ -5193,10 +5193,10 @@ class Eynollah_ocr:
        self.dir_out = dir_out
        self.dir_xmls = dir_xmls
        self.dir_models = dir_models
        self.model_name = model_name
        self.tr_ocr = tr_ocr
        self.export_textline_images_and_text = export_textline_images_and_text
        self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
        self.draw_texts_on_image = draw_texts_on_image
        self.dir_out_image_text = dir_out_image_text
        self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
        self.pref_of_dataset = pref_of_dataset
@ -5210,7 +5210,10 @@ class Eynollah_ocr:
            if tr_ocr:
                self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
                self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-                self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
+                if self.model_name:
                    self.model_ocr_dir = self.model_name
                else:
                    self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124"
                self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir)
                self.model_ocr.to(self.device)
                if not batch_size:
@ -5219,7 +5222,10 @@ class Eynollah_ocr:
                    self.b_s = int(batch_size)
            else:
-                self.model_ocr_dir = dir_models + "/model_step_45000_ocr"#"/model_eynollah_ocr_cnnrnn_20250805"#
+                if self.model_name:
                    self.model_ocr_dir = self.model_name
                else:
                    self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250805"
                model_ocr = load_model(self.model_ocr_dir , compile=False)
                self.prediction_model = tf.keras.models.Model(
@ -5230,7 +5236,7 @@ class Eynollah_ocr:
                else:
                    self.b_s = int(batch_size)
-                with open(os.path.join(self.model_ocr_dir, "characters_20250707_all_lang.txt"),"r") as config_file:
+                with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file:
                    characters = json.load(config_file)
                AUTOTUNE = tf.data.AUTOTUNE
@ -5271,7 +5277,7 @@ class Eynollah_ocr:
                img = cv2.imread(dir_img)
-                if self.draw_texts_on_image:
+                if self.dir_out_image_text:
                    out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png')
                    image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
                    draw = ImageDraw.Draw(image_text)
@ -5306,7 +5312,7 @@ class Eynollah_ocr:
                                    textline_coords =  np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ]  for x in p_h] )
                                    x,y,w,h = cv2.boundingRect(textline_coords)
-                                    if self.draw_texts_on_image:
+                                    if self.dir_out_image_text:
                                        total_bb_coordinates.append([x,y,w,h])
                                    h2w_ratio = h/float(w)
@ -5363,7 +5369,7 @@ class Eynollah_ocr:
                unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
-                if self.draw_texts_on_image:
+                if self.dir_out_image_text:
                    font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
                    font = ImageFont.truetype(font_path, 40)
@ -5463,7 +5469,7 @@ class Eynollah_ocr:
                    dir_img_bin = os.path.join(self.dir_in_bin, file_name+'.png')
                    img_bin = cv2.imread(dir_img_bin)
-                if self.draw_texts_on_image:
+                if self.dir_out_image_text:
                    out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png')
                    image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
                    draw = ImageDraw.Draw(image_text)
@ -5508,7 +5514,7 @@ class Eynollah_ocr:
                                    if type_textregion=='drop-capital':
                                        angle_degrees = 0
-                                    if self.draw_texts_on_image:
+                                    if self.dir_out_image_text:
                                        total_bb_coordinates.append([x,y,w,h])
                                    w_scaled = w *  image_height/float(h)
@ -5829,7 +5835,7 @@ class Eynollah_ocr:
                    unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
-                    if self.draw_texts_on_image:
+                    if self.dir_out_image_text:
                        font_path = "Charis-7.000/Charis-Regular.ttf"  # Make sure this file exists!
                        font = ImageFont.truetype(font_path, 40)