From 27c4b0d0e09ff9d7dabe31074f225adedb3ee5d1 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sun, 25 May 2025 01:12:58 +0200 Subject: [PATCH] Drop capitals are written separately and are not attached to their corresponding text line. The OCR use case also supports single-image input. --- src/eynollah/cli.py | 11 ++++++++-- src/eynollah/eynollah.py | 46 +++++++++++++++++++++++++++++----------- src/eynollah/writer.py | 8 +++---- 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index cd56833..0c18b2c 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -331,6 +331,12 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ @main.command() +@click.option( + "--image", + "-i", + help="image filename", + type=click.Path(exists=True, dir_okay=False), +) @click.option( "--dir_in", "-di", @@ -415,7 +421,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_ help="Override log level globally to this", ) -def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level): +def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level): initLogging() if log_level: getLogger('eynollah').setLevel(getLevelName(log_level)) @@ -426,8 +432,9 @@ def ocr(dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, ex assert not export_textline_images_and_text or not dir_out_image_text, "Exporting textline and text -etit can not be set alongside directory of images with predicted text -doit" assert not export_textline_images_and_text or not draw_texts_on_image, "Exporting textline and text -etit can not be set alongside draw text on image -dtoi" assert not export_textline_images_and_text or not prediction_with_both_of_rgb_and_bin, "Exporting textline and text -etit can not be set alongside prediction with both rgb and bin -brb" - + assert (bool(image) ^ bool(dir_in)), "Either -i (single image) or -di (directory) must be provided, but not both." eynollah_ocr = Eynollah_ocr( + image_filename=image, dir_xmls=dir_xmls, dir_out_image_text=dir_out_image_text, dir_in=dir_in, diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 1b50713..aa38274 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -5134,10 +5134,10 @@ class Eynollah: pixel_img = 4 polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline( - text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, - all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, - kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) + ##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline( + ##text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, + ##all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, + ##kernel=KERNEL, curved_line=self.curved_line, textline_light=self.textline_light) if not self.reading_order_machine_based: pixel_seps = 6 @@ -5299,6 +5299,7 @@ class Eynollah_ocr: dir_models, dir_xmls=None, dir_in=None, + image_filename=None, dir_in_bin=None, dir_out=None, dir_out_image_text=None, @@ -5312,6 +5313,7 @@ class Eynollah_ocr: logger=None, ): self.dir_in = dir_in + self.image_filename = image_filename self.dir_in_bin = dir_in_bin self.dir_out = dir_out self.dir_xmls = dir_xmls @@ -5363,13 +5365,20 @@ class Eynollah_ocr: ) def run(self): - ls_imgs = os.listdir(self.dir_in) + if self.dir_in: + ls_imgs = os.listdir(self.dir_in) + else: + ls_imgs = [self.image_filename] if self.tr_ocr: tr_ocr_input_height_and_width = 384 for ind_img in ls_imgs: - file_name = Path(ind_img).stem - dir_img = os.path.join(self.dir_in, ind_img) + if self.dir_in: + file_name = Path(ind_img).stem + dir_img = os.path.join(self.dir_in, ind_img) + else: + file_name = Path(self.image_filename).stem + dir_img = self.image_filename dir_xml = os.path.join(self.dir_xmls, file_name+'.xml') out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') img = cv2.imread(dir_img) @@ -5541,8 +5550,15 @@ class Eynollah_ocr: img_size=(image_width, image_height) for ind_img in ls_imgs: - file_name = Path(ind_img).stem - dir_img = os.path.join(self.dir_in, ind_img) + if self.dir_in: + file_name = Path(ind_img).stem + dir_img = os.path.join(self.dir_in, ind_img) + else: + file_name = Path(self.image_filename).stem + dir_img = self.image_filename + + #file_name = Path(ind_img).stem + #dir_img = os.path.join(self.dir_in, ind_img) dir_xml = os.path.join(self.dir_xmls, file_name+'.xml') out_file_ocr = os.path.join(self.dir_out, file_name+'.xml') img = cv2.imread(dir_img) @@ -5576,6 +5592,7 @@ class Eynollah_ocr: indexer_text_region = 0 indexer_textlines = 0 for nn in root1.iter(region_tags): + type_textregion = nn.attrib['type'] for child_textregion in nn: if child_textregion.tag.endswith("TextLine"): for child_textlines in child_textregion: @@ -5589,7 +5606,9 @@ class Eynollah_ocr: angle_radians = math.atan2(h, w) # Convert to degrees angle_degrees = math.degrees(angle_radians) - + if type_textregion=='drop-capital': + angle_degrees = 0 + if self.draw_texts_on_image: total_bb_coordinates.append([x,y,w,h]) @@ -5632,8 +5651,11 @@ class Eynollah_ocr: #print(file_name,w_n*h_n , mask_poly[:,:,0].sum(), mask_poly[:,:,0].sum() /float(w_n*h_n) , 'ikiiiiii') else: img_crop[mask_poly==0] = 255 - if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100: - img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) + if type_textregion=='drop-capital': + pass + else: + if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 100: + img_crop = break_curved_line_into_small_pieces_and_then_merge(img_crop, mask_poly) diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index cf0551b..f07abf6 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -283,14 +283,14 @@ class EynollahXmlWriter(): Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))) page.add_TextRegion(marginal) self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) - + for mm in range(len(found_polygons_drop_capitals)): dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))) page.add_TextRegion(dropcapital) - ###all_box_coord_drop = None - ###slopes_drop = None - ###self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None) + all_box_coord_drop = None + slopes_drop = None + self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None) for mm in range(len(found_polygons_text_region_img)): page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))