diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 67fd57e..9dc326d 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -456,6 +456,11 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ help="directory of models", type=click.Path(exists=True, file_okay=False), ) +@click.option( + "--model_name", + help="Specific model file path to use for OCR", + type=click.Path(exists=True, file_okay=False), +) @click.option( "--tr_ocr", "-trocr/-notrocr", @@ -474,12 +479,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ is_flag=True, help="if this parameter set to true, cropped textline images will not be masked with textline contour.", ) -@click.option( - "--draw_texts_on_image", - "-dtoi/-ndtoi", - is_flag=True, - help="if this parameter set to true, the predicted texts will be displayed on an image.", -) @click.option( "--prediction_with_both_of_rgb_and_bin", "-brb/-nbrb", @@ -508,16 +507,17 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ help="Override log level globally to this", ) -def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level): +def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, model_name, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, min_conf_value_of_textline_text, log_level): initLogging() if log_level: getLogger('eynollah').setLevel(getLevelName(log_level)) + + assert not model or not model_name, "model directory -m can not be set alongside specific model name --model_name" assert not export_textline_images_and_text or not tr_ocr, "Exporting textline and text -etit can not be set alongside transformer ocr -tr_ocr" assert not export_textline_images_and_text or not model, "Exporting textline and text -etit can not be set alongside model -m" assert not export_textline_images_and_text or not batch_size, "Exporting textline and text -etit can not be set alongside batch size -bs" assert not export_textline_images_and_text or not dir_in_bin, "Exporting textline and text -etit can not be set alongside directory of bin images -dib" assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit" - assert not export_textline_images_and_text or not draw_texts_on_image, "Exporting textline and text -etit can not be set alongside draw text on image -dtoi" assert not export_textline_images_and_text or not prediction_with_both_of_rgb_and_bin, "Exporting textline and text -etit can not be set alongside prediction with both rgb and bin -brb" assert (bool(image) ^ bool(dir_in)), "Either -i (single image) or -di (directory) must be provided, but not both." eynollah_ocr = Eynollah_ocr( @@ -528,10 +528,10 @@ def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, dir_in_bin=dir_in_bin, dir_out=out, dir_models=model, + model_name=model_name, tr_ocr=tr_ocr, export_textline_images_and_text=export_textline_images_and_text, do_not_mask_with_textline_contour=do_not_mask_with_textline_contour, - draw_texts_on_image=draw_texts_on_image, prediction_with_both_of_rgb_and_bin=prediction_with_both_of_rgb_and_bin, batch_size=batch_size, pref_of_dataset=dataset_abbrevation, diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 30e180d..ec2900f 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -5171,6 +5171,7 @@ class Eynollah_ocr: def __init__( self, dir_models, + model_name=None, dir_xmls=None, dir_in=None, image_filename=None, @@ -5181,7 +5182,6 @@ class Eynollah_ocr: batch_size=None, export_textline_images_and_text=False, do_not_mask_with_textline_contour=False, - draw_texts_on_image=False, prediction_with_both_of_rgb_and_bin=False, pref_of_dataset=None, min_conf_value_of_textline_text : Optional[float]=None, @@ -5193,10 +5193,10 @@ class Eynollah_ocr: self.dir_out = dir_out self.dir_xmls = dir_xmls self.dir_models = dir_models + self.model_name = model_name self.tr_ocr = tr_ocr self.export_textline_images_and_text = export_textline_images_and_text self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour - self.draw_texts_on_image = draw_texts_on_image self.dir_out_image_text = dir_out_image_text self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin self.pref_of_dataset = pref_of_dataset @@ -5210,7 +5210,10 @@ class Eynollah_ocr: if tr_ocr: self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" + if self.model_name: + self.model_ocr_dir = self.model_name + else: + self.model_ocr_dir = dir_models + "/trocr_model_ens_of_3_checkpoints_201124" self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) self.model_ocr.to(self.device) if not batch_size: @@ -5219,7 +5222,10 @@ class Eynollah_ocr: self.b_s = int(batch_size) else: - self.model_ocr_dir = dir_models + "/model_step_45000_ocr"#"/model_eynollah_ocr_cnnrnn_20250805"# + if self.model_name: + self.model_ocr_dir = self.model_name + else: + self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250805" model_ocr = load_model(self.model_ocr_dir , compile=False) self.prediction_model = tf.keras.models.Model( @@ -5230,7 +5236,7 @@ class Eynollah_ocr: else: self.b_s = int(batch_size) - with open(os.path.join(self.model_ocr_dir, "characters_20250707_all_lang.txt"),"r") as config_file: + with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file: characters = json.load(config_file) AUTOTUNE = tf.data.AUTOTUNE @@ -5271,7 +5277,7 @@ class Eynollah_ocr: img = cv2.imread(dir_img) - if self.draw_texts_on_image: + if self.dir_out_image_text: out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png') image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white") draw = ImageDraw.Draw(image_text) @@ -5306,7 +5312,7 @@ class Eynollah_ocr: textline_coords = np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) x,y,w,h = cv2.boundingRect(textline_coords) - if self.draw_texts_on_image: + if self.dir_out_image_text: total_bb_coordinates.append([x,y,w,h]) h2w_ratio = h/float(w) @@ -5363,7 +5369,7 @@ class Eynollah_ocr: unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) - if self.draw_texts_on_image: + if self.dir_out_image_text: font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists! font = ImageFont.truetype(font_path, 40) @@ -5463,7 +5469,7 @@ class Eynollah_ocr: dir_img_bin = os.path.join(self.dir_in_bin, file_name+'.png') img_bin = cv2.imread(dir_img_bin) - if self.draw_texts_on_image: + if self.dir_out_image_text: out_image_with_text = os.path.join(self.dir_out_image_text, file_name+'.png') image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white") draw = ImageDraw.Draw(image_text) @@ -5508,7 +5514,7 @@ class Eynollah_ocr: if type_textregion=='drop-capital': angle_degrees = 0 - if self.draw_texts_on_image: + if self.dir_out_image_text: total_bb_coordinates.append([x,y,w,h]) w_scaled = w * image_height/float(h) @@ -5829,7 +5835,7 @@ class Eynollah_ocr: unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer) - if self.draw_texts_on_image: + if self.dir_out_image_text: font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists! font = ImageFont.truetype(font_path, 40)