From caf0fbe90f0eea8725a3f245db48256a399bd0aa Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 25 Mar 2025 22:32:59 +0100 Subject: [PATCH] pep8-e302: two blank lines between functions --- src/eynollah/cli.py | 12 ++++++++---- src/eynollah/eynollah.py | 1 + src/eynollah/utils/__init__.py | 12 ++++++++++++ src/eynollah/utils/contour.py | 19 +++++++++++++++++++ src/eynollah/utils/counter.py | 1 + src/eynollah/utils/drop_capitals.py | 2 ++ src/eynollah/utils/rotate.py | 9 +++++++++ src/eynollah/utils/separate_lines.py | 13 +++++++++++++ src/eynollah/writer.py | 1 + tests/test_dpi.py | 2 ++ tests/test_run.py | 2 ++ tests/test_xml.py | 2 ++ 12 files changed, 72 insertions(+), 4 deletions(-) diff --git a/src/eynollah/cli.py b/src/eynollah/cli.py index 7f3c230..f07c0a0 100644 --- a/src/eynollah/cli.py +++ b/src/eynollah/cli.py @@ -4,10 +4,12 @@ from ocrd_utils import initLogging, setOverrideLogLevel from eynollah.eynollah import Eynollah, Eynollah_ocr from eynollah.sbb_binarize import SbbBinarizer + @click.group() def main(): pass + @main.command() @click.option( "--dir_xml", @@ -49,6 +51,7 @@ def main(): def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size): xml_files_ind = os.listdir(dir_xml) + @main.command() @click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.') @@ -78,10 +81,10 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out) print("Error: You used -do to write out binarized images but have not set -di") sys.exit(1) SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in, dir_out=dir_out) - - - - + + + + @main.command() @click.option( "--image", @@ -381,5 +384,6 @@ def ocr(dir_in, out, dir_xmls, model, tr_ocr, export_textline_images_and_text, d ) eynollah_ocr.run() + if __name__ == "__main__": main() diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 0256128..612303a 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -163,6 +163,7 @@ class PatchEncoder(layers.Layer): }) return config + class Eynollah: def __init__( self, diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 8a89301..b4eb3a6 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -14,6 +14,7 @@ from .contour import (contours_in_same_horizon, return_contours_of_image, return_parent_contours) + def return_x_start_end_mothers_childs_and_type_of_reading_order( x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff): @@ -560,6 +561,7 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl ##print(len(peaks_neg_true)) return len(peaks_neg_true), peaks_neg_true + def find_num_col_only_image(regions_without_separators, multiplier=3.8): regions_without_separators_0 = regions_without_separators[:, :].sum(axis=0) @@ -755,6 +757,7 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): return len(peaks_fin_true), peaks_fin_true + def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8): regions_without_separators_0 = regions_without_separators[:, :, 0].sum(axis=0) @@ -768,6 +771,7 @@ def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8): # print(peaks,'peaksnew') return peaks + def return_regions_without_separators(regions_pre): kernel = np.ones((5, 5), np.uint8) regions_without_separators = ((regions_pre[:, :] != 6) & @@ -782,6 +786,7 @@ def return_regions_without_separators(regions_pre): return regions_without_separators + def put_drop_out_from_only_drop_model(layout_no_patch, layout1): drop_only = (layout_no_patch[:, :, 0] == 4) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) @@ -815,6 +820,7 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1): return layout_no_patch + def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label, text_regions_p): drop_only = (layout_in_patch[:, :, 0] == drop_capital_label) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) @@ -854,6 +860,7 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop return layout_in_patch + def check_any_text_region_in_model_one_is_main_or_header( regions_model_1, regions_model_full, contours_only_text_parent, @@ -1013,6 +1020,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light( contours_only_text_parent_main_d, contours_only_text_parent_head_d) + def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col): # print(textlines_con) # textlines_con=textlines_con.astype(np.uint32) @@ -1280,6 +1288,7 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): return final_indexers_sorted, matrix_of_orders, final_types, final_index_type + def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( img_p_in_ver, img_in_hor,num_col_classifier): @@ -1379,6 +1388,7 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( special_separators=[] return img_p_in[:,:,0], special_separators + def return_points_with_boundies(peaks_neg_fin, first_point, last_point): peaks_neg_tot = [first_point] for ii in range(len(peaks_neg_fin)): @@ -1386,6 +1396,7 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point): peaks_neg_tot.append(last_point) return peaks_neg_tot + def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None): t_ins_c0 = time.time() separators_closeup= (region_pre_p[:, :, :] == pixel_lines) * 1 @@ -1596,6 +1607,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n + def return_boxes_of_images_by_order_of_reading_new( splitter_y_new, regions_without_separators, matrix_of_lines_ch, diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 009cee7..d2dcd5f 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -5,6 +5,7 @@ from shapely import geometry from .rotate import rotate_image, rotation_image_new + def contours_in_same_horizon(cy_main_hor): X1 = np.zeros((len(cy_main_hor), len(cy_main_hor))) X2 = np.zeros((len(cy_main_hor), len(cy_main_hor))) @@ -22,11 +23,13 @@ def contours_in_same_horizon(cy_main_hor): all_args.append(list(set(list_h))) return np.unique(np.array(all_args, dtype=object)) + def find_contours_mean_y_diff(contours_main): M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] return np.mean(np.diff(np.sort(np.array(cy_main)))) + def get_text_region_boxes_by_given_contours(contours): boxes = [] contours_new = [] @@ -37,6 +40,7 @@ def get_text_region_boxes_by_given_contours(contours): return boxes, contours_new + def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): found_polygons_early = [] for jv,c in enumerate(contours): @@ -51,6 +55,7 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area for point in polygon.exterior.coords], dtype=np.uint)) return found_polygons_early + def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): found_polygons_early = [] for jv,c in enumerate(contours): @@ -71,6 +76,7 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m for point in polygon.exterior.coords], dtype=np.int32)) return found_polygons_early + def find_new_features_of_contours(contours_main): areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) @@ -114,6 +120,7 @@ def find_new_features_of_contours(contours_main): return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin + def find_features_of_contours(contours_main): areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))] @@ -127,12 +134,14 @@ def find_features_of_contours(contours_main): return y_min_main, y_max_main + def return_parent_contours(contours, hierarchy): contours_parent = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3] == -1] return contours_parent + def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: @@ -150,6 +159,7 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): max_area=1, min_area=min_area) return contours_imgs + def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): img_copy = np.zeros(img.shape) img_copy = cv2.fillPoly(img_copy, pts=[contour], color=(1, 1, 1)) @@ -166,6 +176,7 @@ def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): return cont_int[0], index_r_con + def get_textregion_contours_in_org_image_multi(cnts, img, slope_first, map=map): if not len(cnts): return [], [] @@ -176,6 +187,7 @@ def get_textregion_contours_in_org_image_multi(cnts, img, slope_first, map=map): cnts, range(len(cnts))) return tuple(zip(*results)) + def get_textregion_contours_in_org_image(cnts, img, slope_first): cnts_org = [] # print(cnts,'cnts') @@ -204,6 +216,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): return cnts_org + def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): zoom = 3 img = cv2.resize(img, (img.shape[1] // zoom, @@ -225,6 +238,7 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): return cnts_org + def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first): img_copy = np.zeros(img.shape) img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=(1, 1, 1)) @@ -239,6 +253,7 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first # print(np.shape(cont_int[0])) return cont_int[0], index_r_con + def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map): if not len(cnts): return [] @@ -254,6 +269,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map): contours, indexes = tuple(zip(*results)) return [i*6 for i in contours] + def return_contours_of_interested_textline(region_pre_p, pixel): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: @@ -271,6 +287,7 @@ def return_contours_of_interested_textline(region_pre_p, pixel): thresh, contours_imgs, hierarchy, max_area=1, min_area=0.000000003) return contours_imgs + def return_contours_of_image(image): if len(image.shape) == 2: image = np.repeat(image[:, :, np.newaxis], 3, axis=2) @@ -282,6 +299,7 @@ def return_contours_of_image(image): contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) return contours, hierarchy + def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: @@ -300,6 +318,7 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si return contours_imgs + def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area): # pixels of images are identified by 5 if len(region_pre_p.shape) == 3: diff --git a/src/eynollah/utils/counter.py b/src/eynollah/utils/counter.py index e6205c8..eee2e30 100644 --- a/src/eynollah/utils/counter.py +++ b/src/eynollah/utils/counter.py @@ -3,6 +3,7 @@ from collections import Counter REGION_ID_TEMPLATE = 'region_%04d' LINE_ID_TEMPLATE = 'region_%04d_line_%04d' + class EynollahIdCounter: def __init__(self, region_idx=0, line_idx=0): diff --git a/src/eynollah/utils/drop_capitals.py b/src/eynollah/utils/drop_capitals.py index 67547d3..52a4e7d 100644 --- a/src/eynollah/utils/drop_capitals.py +++ b/src/eynollah/utils/drop_capitals.py @@ -7,6 +7,7 @@ from .contour import ( return_contours_of_interested_region, ) + def adhere_drop_capital_region_into_corresponding_textline( text_regions_p, polygons_of_drop_capitals, @@ -499,6 +500,7 @@ def adhere_drop_capital_region_into_corresponding_textline( #####pass return all_found_textline_polygons + def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1): drop_only = (layout_no_patch[:, :, 0] == 4) * 1 diff --git a/src/eynollah/utils/rotate.py b/src/eynollah/utils/rotate.py index 603c2d9..731814f 100644 --- a/src/eynollah/utils/rotate.py +++ b/src/eynollah/utils/rotate.py @@ -3,6 +3,7 @@ import math import imutils import cv2 + def rotatedRectWithMaxArea(w, h, angle): if w <= 0 or h <= 0: return 0, 0 @@ -25,6 +26,7 @@ def rotatedRectWithMaxArea(w, h, angle): return wr, hr + def rotate_max_area_new(image, rotated, angle): wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle)) h, w, _ = rotated.shape @@ -34,16 +36,19 @@ def rotate_max_area_new(image, rotated, angle): x2 = x1 + int(wr) return rotated[y1:y2, x1:x2] + def rotation_image_new(img, thetha): rotated = imutils.rotate(img, thetha) return rotate_max_area_new(img, rotated, thetha) + def rotate_image(img_patch, slope): (h, w) = img_patch.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, slope, 1.0) return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) + def rotate_image_different( img, slope): # img = cv2.imread('images/input.jpg') num_rows, num_cols = img.shape[:2] @@ -52,6 +57,7 @@ def rotate_image_different( img, slope): img_rotation = cv2.warpAffine(img, rotation_matrix, (num_cols, num_rows)) return img_rotation + def rotate_max_area(image, rotated, rotated_textline, rotated_layout, rotated_table_prediction, angle): wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle)) h, w, _ = rotated.shape @@ -61,6 +67,7 @@ def rotate_max_area(image, rotated, rotated_textline, rotated_layout, rotated_ta x2 = x1 + int(wr) return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_table_prediction[y1:y2, x1:x2] + def rotation_not_90_func(img, textline, text_regions_p_1, table_prediction, thetha): rotated = imutils.rotate(img, thetha) rotated_textline = imutils.rotate(textline, thetha) @@ -68,6 +75,7 @@ def rotation_not_90_func(img, textline, text_regions_p_1, table_prediction, thet rotated_table_prediction = imutils.rotate(table_prediction, thetha) return rotate_max_area(img, rotated, rotated_textline, rotated_layout, rotated_table_prediction, thetha) + def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regions_p_fully, thetha): rotated = imutils.rotate(img, thetha) rotated_textline = imutils.rotate(textline, thetha) @@ -75,6 +83,7 @@ def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regio rotated_layout_full = imutils.rotate(text_regions_p_fully, thetha) return rotate_max_area_full_layout(img, rotated, rotated_textline, rotated_layout, rotated_layout_full, thetha) + def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout, rotated_layout_full, angle): wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle)) h, w, _ = rotated.shape diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index a32933d..5057c34 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -20,6 +20,7 @@ from . import ( crop_image_inside_box, ) + def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): (h, w) = img_patch.shape[:2] center = (w // 2, h // 2) @@ -131,6 +132,7 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): peaks, peaks_neg, rotation_matrix) + def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): (h, w) = img_patch.shape[:2] center = (w // 2, h // 2) @@ -604,6 +606,7 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): return peaks, textline_boxes_rot + def separate_lines_vertical(img_patch, contour_text_interest, thetha): thetha = thetha + 90 contour_text_interest_copy = contour_text_interest.copy() @@ -913,6 +916,7 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha): [int(x_min), int(point_down)]])) return peaks, textline_boxes_rot + def separate_lines_new_inside_tiles2(img_patch, thetha): (h, w) = img_patch.shape[:2] center = (w // 2, h // 2) @@ -1255,6 +1259,7 @@ def separate_lines_new_inside_tiles(img_path, thetha): img_path = cv2.erode(img_path, kernel, iterations=2) return img_path + def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_ind, add_boxes_coor_into_textlines): kernel = np.ones((5, 5), np.uint8) pixel = 255 @@ -1299,6 +1304,7 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i ##print(cont_final,'nadizzzz') return None, cont_final + def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False): textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 textline_mask = textline_mask.astype(np.uint8) @@ -1390,6 +1396,7 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest return contours_rotated_clean + def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None): if logger is None: logger = getLogger(__package__) @@ -1500,6 +1507,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl # plt.show() return img_patch_ineterst_revised + def do_image_rotation(angle, img, sigma_des, logger=None): if logger is None: logger = getLogger(__package__) @@ -1512,6 +1520,7 @@ def do_image_rotation(angle, img, sigma_des, logger=None): var = 0 return var + def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, logger=None, plotter=None, map=map): if main_page and plotter: @@ -1568,6 +1577,7 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, return angle + def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map): if logger is None: logger = getLogger(__package__) @@ -1583,6 +1593,7 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map angle = 0 return angle + def do_work_of_slopes_new( box_text, contour, contour_par, index_r_con, textline_mask_tot_ea, image_page_rotated, slope_deskew, @@ -1651,6 +1662,7 @@ def do_work_of_slopes_new( return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope + def do_work_of_slopes_new_curved( box_text, contour, contour_par, index_r_con, textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew, @@ -1746,6 +1758,7 @@ def do_work_of_slopes_new_curved( return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope + def do_work_of_slopes_new_light( box_text, contour, contour_par, index_r_con, textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light, diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 2ce3757..ae12725 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -21,6 +21,7 @@ from ocrd_models.ocrd_page import ( ) import numpy as np + class EynollahXmlWriter: def __init__(self, *, dir_out, image_filename, curved_line,textline_light, pcgts=None): diff --git a/tests/test_dpi.py b/tests/test_dpi.py index 3376bf4..2f4e438 100644 --- a/tests/test_dpi.py +++ b/tests/test_dpi.py @@ -3,9 +3,11 @@ from pathlib import Path from eynollah.utils.pil_cv2 import check_dpi from tests.base import main + def test_dpi(): fpath = str(Path(__file__).parent.joinpath('resources', 'kant_aufklaerung_1784_0020.tif')) assert 230 == check_dpi(cv2.imread(fpath)) + if __name__ == '__main__': main(__file__) diff --git a/tests/test_run.py b/tests/test_run.py index cdb715a..859058e 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -8,6 +8,7 @@ testdir = Path(__file__).parent.resolve() EYNOLLAH_MODELS = environ.get('EYNOLLAH_MODELS', str(testdir.joinpath('..', 'models_eynollah').resolve())) + class TestEynollahRun(TestCase): def test_full_run(self): @@ -20,5 +21,6 @@ class TestEynollahRun(TestCase): print(code, out, err) assert not code + if __name__ == '__main__': main(__file__) diff --git a/tests/test_xml.py b/tests/test_xml.py index 09a6ddf..ecbcaa8 100644 --- a/tests/test_xml.py +++ b/tests/test_xml.py @@ -4,11 +4,13 @@ from ocrd_models.ocrd_page import to_xml PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15' + def test_create_xml(): pcgts = create_page_xml('/path/to/img.tif', 100, 100) xmlstr = to_xml(pcgts) assert 'xmlns:pc="%s"' % PAGE_2019 in xmlstr assert 'Metadata' in xmlstr + if __name__ == '__main__': main([__file__])