From 27c4b0d0e09ff9d7dabe31074f225adedb3ee5d1 Mon Sep 17 00:00:00 2001
From: vahidrezanezhad <vahid631983@gmail.com>
Date: Sun, 25 May 2025 01:12:58 +0200
Subject: [PATCH] Drop capitals are written separately and are not attached to
 their corresponding text line. The OCR use case also supports single-image
 input.

---
 src/eynollah/cli.py      | 11 ++++++++--
 src/eynollah/eynollah.py | 46 +++++++++++++++++++++++++++++-----------
 src/eynollah/writer.py   |  8 +++----
 3 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py
index cd56833..0c18b2c 100644
--- a/src/eynollah/cli.py
+++ b/src/eynollah/cli.py
@@ -331,6 +331,12 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
 
 
 @main.command()
+@click.option(
+    "--image",
+    "-i",
+    help="image filename",
+    type=click.Path(exists=True, dir_okay=False),
+)
 @click.option(
     "--dir_in",
     "-di",
@@ -415,7 +421,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
     help="Override log level globally to this",
 )
 
-def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
+def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
     initLogging()
     if log_level:
         getLogger('eynollah').setLevel(getLevelName(log_level))
@@ -426,8 +432,9 @@ def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, ex
     assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text  -etit can not be set alongside directory of images with predicted text -doit"
     assert not export_textline_images_and_text or not draw_texts_on_image, "Exporting textline and text  -etit can not be set alongside draw text on image -dtoi"
     assert not export_textline_images_and_text or not prediction_with_both_of_rgb_and_bin, "Exporting textline and text  -etit can not be set alongside prediction with both rgb and bin -brb"
-    
+    assert (bool(image) ^ bool(dir_in)), "Either -i (single image) or -di (directory) must be provided, but not both."
     eynollah_ocr = Eynollah_ocr(
+        image_filename=image,
         dir_xmls=dir_xmls,
         dir_out_image_text=dir_out_image_text,
         dir_in=dir_in,
diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py
index 1b50713..aa38274 100644
--- a/src/eynollah/eynollah.py
+++ b/src/eynollah/eynollah.py
@@ -5134,10 +5134,10 @@ class Eynollah:
 
             pixel_img = 4
             polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img)
-            all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(
-                text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h,
-                all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h,
-                kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light)
+            ##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(
+                ##text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h,
+                ##all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h,
+                ##kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light)
 
             if not self.reading_order_machine_based:
                 pixel_seps = 6
@@ -5299,6 +5299,7 @@ class Eynollah_ocr:
         dir_models,
         dir_xmls=None,
         dir_in=None,
+        image_filename=None,
         dir_in_bin=None,
         dir_out=None,
         dir_out_image_text=None,
@@ -5312,6 +5313,7 @@ class Eynollah_ocr:
         logger=None,
     ):
         self.dir_in = dir_in
+        self.image_filename = image_filename
         self.dir_in_bin = dir_in_bin
         self.dir_out = dir_out
         self.dir_xmls = dir_xmls
@@ -5363,13 +5365,20 @@ class Eynollah_ocr:
                 )
 
     def run(self):
-        ls_imgs = os.listdir(self.dir_in)
+        if self.dir_in:
+            ls_imgs = os.listdir(self.dir_in)
+        else:
+            ls_imgs = [self.image_filename]
         
         if self.tr_ocr:
             tr_ocr_input_height_and_width = 384
             for ind_img in ls_imgs:
-                file_name = Path(ind_img).stem
-                dir_img = os.path.join(self.dir_in, ind_img)
+                if self.dir_in:
+                    file_name = Path(ind_img).stem
+                    dir_img = os.path.join(self.dir_in, ind_img)
+                else:
+                    file_name = Path(self.image_filename).stem
+                    dir_img = self.image_filename
                 dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                 out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
                 img = cv2.imread(dir_img)
@@ -5541,8 +5550,15 @@ class Eynollah_ocr:
             img_size=(image_width, image_height)
             
             for ind_img in ls_imgs:
-                file_name = Path(ind_img).stem
-                dir_img = os.path.join(self.dir_in, ind_img)
+                if self.dir_in:
+                    file_name = Path(ind_img).stem
+                    dir_img = os.path.join(self.dir_in, ind_img)
+                else:
+                    file_name = Path(self.image_filename).stem
+                    dir_img = self.image_filename
+                    
+                #file_name = Path(ind_img).stem
+                #dir_img = os.path.join(self.dir_in, ind_img)
                 dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
                 out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
                 img = cv2.imread(dir_img)
@@ -5576,6 +5592,7 @@ class Eynollah_ocr:
                 indexer_text_region = 0
                 indexer_textlines = 0
                 for nn in root1.iter(region_tags):
+                    type_textregion = nn.attrib['type']
                     for child_textregion in nn:
                         if child_textregion.tag.endswith("TextLine"):
                             for child_textlines in child_textregion:
@@ -5589,7 +5606,9 @@ class Eynollah_ocr:
                                     angle_radians = math.atan2(h, w)
                                     # Convert to degrees
                                     angle_degrees = math.degrees(angle_radians)
-                                    
+                                    if type_textregion=='drop-capital':
+                                        angle_degrees = 0
+                                        
                                     if self.draw_texts_on_image:
                                         total_bb_coordinates.append([x,y,w,h])
                                         
@@ -5632,8 +5651,11 @@ class Eynollah_ocr:
                                             #print(file_name,w_n*h_n , mask_poly[:,:,0].sum(),  mask_poly[:,:,0].sum() /float(w_n*h_n) , 'ikiiiiii')
                                         else:
                                             img_crop[mask_poly==0] = 255
-                                            if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100:
-                                                img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
+                                            if type_textregion=='drop-capital':
+                                                pass
+                                            else:
+                                                if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100:
+                                                    img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly)
 
 
                                         
diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py
index cf0551b..f07abf6 100644
--- a/src/eynollah/writer.py
+++ b/src/eynollah/writer.py
@@ -283,14 +283,14 @@ class EynollahXmlWriter():
                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord)))
             page.add_TextRegion(marginal)
             self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter)
-
+        
         for mm in range(len(found_polygons_drop_capitals)):
             dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital',
                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord)))
             page.add_TextRegion(dropcapital)
-            ###all_box_coord_drop = None
-            ###slopes_drop = None
-            ###self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None)
+            all_box_coord_drop = None
+            slopes_drop = None
+            self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None)
 
         for mm in range(len(found_polygons_text_region_img)):
             page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))