pep8-e302: two blank lines between functions

code-suggestions
cneud 1 week ago
parent 0e8ea64ba4
commit caf0fbe90f

@ -4,10 +4,12 @@ from ocrd_utils import initLogging, setOverrideLogLevel
from eynollah.eynollah import Eynollah, Eynollah_ocr
from eynollah.sbb_binarize import SbbBinarizer
@click.group()
def main():
pass
@main.command()
@click.option(
"--dir_xml",
@ -49,6 +51,7 @@ def main():
def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size):
xml_files_ind = os.listdir(dir_xml)
@main.command()
@click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.')
@ -78,10 +81,10 @@ def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out)
print("Error: You used -do to write out binarized images but have not set -di")
sys.exit(1)
SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in, dir_out=dir_out)
@main.command()
@click.option(
"--image",
@ -381,5 +384,6 @@ def ocr(dir_in, out, dir_xmls, model, tr_ocr, export_textline_images_and_text, d
)
eynollah_ocr.run()
if __name__ == "__main__":
main()

@ -163,6 +163,7 @@ class PatchEncoder(layers.Layer):
})
return config
class Eynollah:
def __init__(
self,

@ -14,6 +14,7 @@ from .contour import (contours_in_same_horizon,
return_contours_of_image,
return_parent_contours)
def return_x_start_end_mothers_childs_and_type_of_reading_order(
x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff):
@ -560,6 +561,7 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl
##print(len(peaks_neg_true))
return len(peaks_neg_true), peaks_neg_true
def find_num_col_only_image(regions_without_separators, multiplier=3.8):
regions_without_separators_0 = regions_without_separators[:, :].sum(axis=0)
@ -755,6 +757,7 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8):
return len(peaks_fin_true), peaks_fin_true
def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8):
regions_without_separators_0 = regions_without_separators[:, :, 0].sum(axis=0)
@ -768,6 +771,7 @@ def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8):
# print(peaks,'peaksnew')
return peaks
def return_regions_without_separators(regions_pre):
kernel = np.ones((5, 5), np.uint8)
regions_without_separators = ((regions_pre[:, :] != 6) &
@ -782,6 +786,7 @@ def return_regions_without_separators(regions_pre):
return regions_without_separators
def put_drop_out_from_only_drop_model(layout_no_patch, layout1):
drop_only = (layout_no_patch[:, :, 0] == 4) * 1
contours_drop, hir_on_drop = return_contours_of_image(drop_only)
@ -815,6 +820,7 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1):
return layout_no_patch
def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label, text_regions_p):
drop_only = (layout_in_patch[:, :, 0] == drop_capital_label) * 1
contours_drop, hir_on_drop = return_contours_of_image(drop_only)
@ -854,6 +860,7 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop
return layout_in_patch
def check_any_text_region_in_model_one_is_main_or_header(
regions_model_1, regions_model_full,
contours_only_text_parent,
@ -1013,6 +1020,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
contours_only_text_parent_main_d,
contours_only_text_parent_head_d)
def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col):
# print(textlines_con)
# textlines_con=textlines_con.astype(np.uint32)
@ -1280,6 +1288,7 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
return final_indexers_sorted, matrix_of_orders, final_types, final_index_type
def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
img_p_in_ver, img_in_hor,num_col_classifier):
@ -1379,6 +1388,7 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(
special_separators=[]
return img_p_in[:,:,0], special_separators
def return_points_with_boundies(peaks_neg_fin, first_point, last_point):
peaks_neg_tot = [first_point]
for ii in range(len(peaks_neg_fin)):
@ -1386,6 +1396,7 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point):
peaks_neg_tot.append(last_point)
return peaks_neg_tot
def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None):
t_ins_c0 = time.time()
separators_closeup= (region_pre_p[:, :, :] == pixel_lines) * 1
@ -1596,6 +1607,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n
def return_boxes_of_images_by_order_of_reading_new(
splitter_y_new, regions_without_separators,
matrix_of_lines_ch,

@ -5,6 +5,7 @@ from shapely import geometry
from .rotate import rotate_image, rotation_image_new
def contours_in_same_horizon(cy_main_hor):
X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
@ -22,11 +23,13 @@ def contours_in_same_horizon(cy_main_hor):
all_args.append(list(set(list_h)))
return np.unique(np.array(all_args, dtype=object))
def find_contours_mean_y_diff(contours_main):
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
return np.mean(np.diff(np.sort(np.array(cy_main))))
def get_text_region_boxes_by_given_contours(contours):
boxes = []
contours_new = []
@ -37,6 +40,7 @@ def get_text_region_boxes_by_given_contours(contours):
return boxes, contours_new
def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area):
found_polygons_early = []
for jv,c in enumerate(contours):
@ -51,6 +55,7 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area
for point in polygon.exterior.coords], dtype=np.uint))
return found_polygons_early
def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
found_polygons_early = []
for jv,c in enumerate(contours):
@ -71,6 +76,7 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
for point in polygon.exterior.coords], dtype=np.int32))
return found_polygons_early
def find_new_features_of_contours(contours_main):
areas_main = np.array([cv2.contourArea(contours_main[j])
for j in range(len(contours_main))])
@ -114,6 +120,7 @@ def find_new_features_of_contours(contours_main):
return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin
def find_features_of_contours(contours_main):
areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))]
@ -127,12 +134,14 @@ def find_features_of_contours(contours_main):
return y_min_main, y_max_main
def return_parent_contours(contours, hierarchy):
contours_parent = [contours[i]
for i in range(len(contours))
if hierarchy[0][i][3] == -1]
return contours_parent
def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
@ -150,6 +159,7 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
max_area=1, min_area=min_area)
return contours_imgs
def do_work_of_contours_in_image(contour, index_r_con, img, slope_first):
img_copy = np.zeros(img.shape)
img_copy = cv2.fillPoly(img_copy, pts=[contour], color=(1, 1, 1))
@ -166,6 +176,7 @@ def do_work_of_contours_in_image(contour, index_r_con, img, slope_first):
return cont_int[0], index_r_con
def get_textregion_contours_in_org_image_multi(cnts, img, slope_first, map=map):
if not len(cnts):
return [], []
@ -176,6 +187,7 @@ def get_textregion_contours_in_org_image_multi(cnts, img, slope_first, map=map):
cnts, range(len(cnts)))
return tuple(zip(*results))
def get_textregion_contours_in_org_image(cnts, img, slope_first):
cnts_org = []
# print(cnts,'cnts')
@ -204,6 +216,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first):
return cnts_org
def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first):
zoom = 3
img = cv2.resize(img, (img.shape[1] // zoom,
@ -225,6 +238,7 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first):
return cnts_org
def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first):
img_copy = np.zeros(img.shape)
img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=(1, 1, 1))
@ -239,6 +253,7 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first
# print(np.shape(cont_int[0]))
return cont_int[0], index_r_con
def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map):
if not len(cnts):
return []
@ -254,6 +269,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first, map=map):
contours, indexes = tuple(zip(*results))
return [i*6 for i in contours]
def return_contours_of_interested_textline(region_pre_p, pixel):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
@ -271,6 +287,7 @@ def return_contours_of_interested_textline(region_pre_p, pixel):
thresh, contours_imgs, hierarchy, max_area=1, min_area=0.000000003)
return contours_imgs
def return_contours_of_image(image):
if len(image.shape) == 2:
image = np.repeat(image[:, :, np.newaxis], 3, axis=2)
@ -282,6 +299,7 @@ def return_contours_of_image(image):
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
return contours, hierarchy
def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
@ -300,6 +318,7 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si
return contours_imgs
def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:

@ -3,6 +3,7 @@ from collections import Counter
REGION_ID_TEMPLATE = 'region_%04d'
LINE_ID_TEMPLATE = 'region_%04d_line_%04d'
class EynollahIdCounter:
def __init__(self, region_idx=0, line_idx=0):

@ -7,6 +7,7 @@ from .contour import (
return_contours_of_interested_region,
)
def adhere_drop_capital_region_into_corresponding_textline(
text_regions_p,
polygons_of_drop_capitals,
@ -499,6 +500,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
#####pass
return all_found_textline_polygons
def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1):
drop_only = (layout_no_patch[:, :, 0] == 4) * 1

@ -3,6 +3,7 @@ import math
import imutils
import cv2
def rotatedRectWithMaxArea(w, h, angle):
if w <= 0 or h <= 0:
return 0, 0
@ -25,6 +26,7 @@ def rotatedRectWithMaxArea(w, h, angle):
return wr, hr
def rotate_max_area_new(image, rotated, angle):
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
h, w, _ = rotated.shape
@ -34,16 +36,19 @@ def rotate_max_area_new(image, rotated, angle):
x2 = x1 + int(wr)
return rotated[y1:y2, x1:x2]
def rotation_image_new(img, thetha):
rotated = imutils.rotate(img, thetha)
return rotate_max_area_new(img, rotated, thetha)
def rotate_image(img_patch, slope):
(h, w) = img_patch.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, slope, 1.0)
return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
def rotate_image_different( img, slope):
# img = cv2.imread('images/input.jpg')
num_rows, num_cols = img.shape[:2]
@ -52,6 +57,7 @@ def rotate_image_different( img, slope):
img_rotation = cv2.warpAffine(img, rotation_matrix, (num_cols, num_rows))
return img_rotation
def rotate_max_area(image, rotated, rotated_textline, rotated_layout, rotated_table_prediction, angle):
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
h, w, _ = rotated.shape
@ -61,6 +67,7 @@ def rotate_max_area(image, rotated, rotated_textline, rotated_layout, rotated_ta
x2 = x1 + int(wr)
return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_table_prediction[y1:y2, x1:x2]
def rotation_not_90_func(img, textline, text_regions_p_1, table_prediction, thetha):
rotated = imutils.rotate(img, thetha)
rotated_textline = imutils.rotate(textline, thetha)
@ -68,6 +75,7 @@ def rotation_not_90_func(img, textline, text_regions_p_1, table_prediction, thet
rotated_table_prediction = imutils.rotate(table_prediction, thetha)
return rotate_max_area(img, rotated, rotated_textline, rotated_layout, rotated_table_prediction, thetha)
def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regions_p_fully, thetha):
rotated = imutils.rotate(img, thetha)
rotated_textline = imutils.rotate(textline, thetha)
@ -75,6 +83,7 @@ def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regio
rotated_layout_full = imutils.rotate(text_regions_p_fully, thetha)
return rotate_max_area_full_layout(img, rotated, rotated_textline, rotated_layout, rotated_layout_full, thetha)
def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout, rotated_layout_full, angle):
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
h, w, _ = rotated.shape

@ -20,6 +20,7 @@ from . import (
crop_image_inside_box,
)
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
(h, w) = img_patch.shape[:2]
center = (w // 2, h // 2)
@ -131,6 +132,7 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
peaks, peaks_neg,
rotation_matrix)
def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
(h, w) = img_patch.shape[:2]
center = (w // 2, h // 2)
@ -604,6 +606,7 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
return peaks, textline_boxes_rot
def separate_lines_vertical(img_patch, contour_text_interest, thetha):
thetha = thetha + 90
contour_text_interest_copy = contour_text_interest.copy()
@ -913,6 +916,7 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
[int(x_min), int(point_down)]]))
return peaks, textline_boxes_rot
def separate_lines_new_inside_tiles2(img_patch, thetha):
(h, w) = img_patch.shape[:2]
center = (w // 2, h // 2)
@ -1255,6 +1259,7 @@ def separate_lines_new_inside_tiles(img_path, thetha):
img_path = cv2.erode(img_path, kernel, iterations=2)
return img_path
def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_ind, add_boxes_coor_into_textlines):
kernel = np.ones((5, 5), np.uint8)
pixel = 255
@ -1299,6 +1304,7 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
##print(cont_final,'nadizzzz')
return None, cont_final
def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False):
textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
textline_mask = textline_mask.astype(np.uint8)
@ -1390,6 +1396,7 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest
return contours_rotated_clean
def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, plotter=None):
if logger is None:
logger = getLogger(__package__)
@ -1500,6 +1507,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, logger=None, pl
# plt.show()
return img_patch_ineterst_revised
def do_image_rotation(angle, img, sigma_des, logger=None):
if logger is None:
logger = getLogger(__package__)
@ -1512,6 +1520,7 @@ def do_image_rotation(angle, img, sigma_des, logger=None):
var = 0
return var
def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
main_page=False, logger=None, plotter=None, map=map):
if main_page and plotter:
@ -1568,6 +1577,7 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
return angle
def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map):
if logger is None:
logger = getLogger(__package__)
@ -1583,6 +1593,7 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map
angle = 0
return angle
def do_work_of_slopes_new(
box_text, contour, contour_par, index_r_con,
textline_mask_tot_ea, image_page_rotated, slope_deskew,
@ -1651,6 +1662,7 @@ def do_work_of_slopes_new(
return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope
def do_work_of_slopes_new_curved(
box_text, contour, contour_par, index_r_con,
textline_mask_tot_ea, image_page_rotated, mask_texts_only, num_col, scale_par, slope_deskew,
@ -1746,6 +1758,7 @@ def do_work_of_slopes_new_curved(
return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope
def do_work_of_slopes_new_light(
box_text, contour, contour_par, index_r_con,
textline_mask_tot_ea, image_page_rotated, slope_deskew, textline_light,

@ -21,6 +21,7 @@ from ocrd_models.ocrd_page import (
)
import numpy as np
class EynollahXmlWriter:
def __init__(self, *, dir_out, image_filename, curved_line,textline_light, pcgts=None):

@ -3,9 +3,11 @@ from pathlib import Path
from eynollah.utils.pil_cv2 import check_dpi
from tests.base import main
def test_dpi():
fpath = str(Path(__file__).parent.joinpath('resources', 'kant_aufklaerung_1784_0020.tif'))
assert 230 == check_dpi(cv2.imread(fpath))
if __name__ == '__main__':
main(__file__)

@ -8,6 +8,7 @@ testdir = Path(__file__).parent.resolve()
EYNOLLAH_MODELS = environ.get('EYNOLLAH_MODELS', str(testdir.joinpath('..', 'models_eynollah').resolve()))
class TestEynollahRun(TestCase):
def test_full_run(self):
@ -20,5 +21,6 @@ class TestEynollahRun(TestCase):
print(code, out, err)
assert not code
if __name__ == '__main__':
main(__file__)

@ -4,11 +4,13 @@ from ocrd_models.ocrd_page import to_xml
PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15'
def test_create_xml():
pcgts = create_page_xml('/path/to/img.tif', 100, 100)
xmlstr = to_xml(pcgts)
assert 'xmlns:pc="%s"' % PAGE_2019 in xmlstr
assert 'Metadata' in xmlstr
if __name__ == '__main__':
main([__file__])

Loading…
Cancel
Save