mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-30 22:49:52 +02:00
pep 8 code style
This commit is contained in:
parent
dbee1a3084
commit
826d38b865
21 changed files with 3384 additions and 3105 deletions
|
@ -215,5 +215,6 @@ def main(
|
||||||
pcgts = eynollah.run()
|
pcgts = eynollah.run()
|
||||||
eynollah.writer.write_pagexml(pcgts)
|
eynollah.writer.write_pagexml(pcgts)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,10 +2,12 @@ from .processor import EynollahProcessor
|
||||||
from click import command
|
from click import command
|
||||||
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
||||||
|
|
||||||
|
|
||||||
@command()
|
@command()
|
||||||
@ocrd_cli_options
|
@ocrd_cli_options
|
||||||
def main(*args, **kwargs):
|
def main(*args, **kwargs):
|
||||||
return ocrd_cli_wrap_processor(EynollahProcessor, *args, **kwargs)
|
return ocrd_cli_wrap_processor(EynollahProcessor, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -9,6 +9,7 @@ from .utils import crop_image_inside_box
|
||||||
from .utils.rotate import rotate_image_different
|
from .utils.rotate import rotate_image_different
|
||||||
from .utils.resize import resize_image
|
from .utils.resize import resize_image
|
||||||
|
|
||||||
|
|
||||||
class EynollahPlotter():
|
class EynollahPlotter():
|
||||||
"""
|
"""
|
||||||
Class collecting all the plotting and image writing methods
|
Class collecting all the plotting and image writing methods
|
||||||
|
@ -54,7 +55,6 @@ class EynollahPlotter():
|
||||||
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
|
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
|
||||||
plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout_main.png"))
|
plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout_main.png"))
|
||||||
|
|
||||||
|
|
||||||
def save_plot_of_layout_main_all(self, text_regions_p, image_page):
|
def save_plot_of_layout_main_all(self, text_regions_p, image_page):
|
||||||
if self.dir_of_all is not None:
|
if self.dir_of_all is not None:
|
||||||
values = np.unique(text_regions_p[:, :])
|
values = np.unique(text_regions_p[:, :])
|
||||||
|
@ -131,6 +131,7 @@ class EynollahPlotter():
|
||||||
cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_page.png"), image_page)
|
cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_page.png"), image_page)
|
||||||
if self.dir_save_page is not None:
|
if self.dir_save_page is not None:
|
||||||
cv2.imwrite(os.path.join(self.dir_save_page, self.image_filename_stem + "_page.png"), image_page)
|
cv2.imwrite(os.path.join(self.dir_save_page, self.image_filename_stem + "_page.png"), image_page)
|
||||||
|
|
||||||
def save_enhanced_image(self, img_res):
|
def save_enhanced_image(self, img_res):
|
||||||
cv2.imwrite(os.path.join(self.dir_out, self.image_filename_stem + "_enhanced.png"), img_res)
|
cv2.imwrite(os.path.join(self.dir_out, self.image_filename_stem + "_enhanced.png"), img_res)
|
||||||
|
|
||||||
|
@ -165,11 +166,10 @@ class EynollahPlotter():
|
||||||
for cont_ind in img_contours:
|
for cont_ind in img_contours:
|
||||||
x, y, w, h = cv2.boundingRect(cont_ind)
|
x, y, w, h = cv2.boundingRect(cont_ind)
|
||||||
box = [x, y, w, h]
|
box = [x, y, w, h]
|
||||||
croped_page, page_coord = crop_image_inside_box(box, image_page)
|
cropped_page, page_coord = crop_image_inside_box(box, image_page)
|
||||||
|
|
||||||
croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y), int(croped_page.shape[1] / self.scale_x))
|
cropped_page = resize_image(cropped_page, int(cropped_page.shape[0] / self.scale_y), int(cropped_page.shape[1] / self.scale_x))
|
||||||
|
|
||||||
path = os.path.join(self.dir_of_cropped_images, self.image_filename_stem + "_" + str(index) + ".jpg")
|
path = os.path.join(self.dir_of_cropped_images, self.image_filename_stem + "_" + str(index) + ".jpg")
|
||||||
cv2.imwrite(path, croped_page)
|
cv2.imwrite(path, cropped_page)
|
||||||
index += 1
|
index += 1
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ from .utils.pil_cv2 import pil2cv
|
||||||
|
|
||||||
OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8'))
|
OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8'))
|
||||||
|
|
||||||
|
|
||||||
class EynollahProcessor(Processor):
|
class EynollahProcessor(Processor):
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -5,6 +5,8 @@ from shapely import geometry
|
||||||
from .rotate import rotate_image, rotation_image_new
|
from .rotate import rotate_image, rotation_image_new
|
||||||
from multiprocessing import Process, Queue, cpu_count
|
from multiprocessing import Process, Queue, cpu_count
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
|
|
||||||
|
|
||||||
def contours_in_same_horizon(cy_main_hor):
|
def contours_in_same_horizon(cy_main_hor):
|
||||||
X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
||||||
X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
||||||
|
@ -22,6 +24,7 @@ def contours_in_same_horizon(cy_main_hor):
|
||||||
all_args.append(list(set(list_h)))
|
all_args.append(list(set(list_h)))
|
||||||
return np.unique(np.array(all_args, dtype=object))
|
return np.unique(np.array(all_args, dtype=object))
|
||||||
|
|
||||||
|
|
||||||
def find_contours_mean_y_diff(contours_main):
|
def find_contours_mean_y_diff(contours_main):
|
||||||
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
||||||
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||||
|
@ -29,7 +32,6 @@ def find_contours_mean_y_diff(contours_main):
|
||||||
|
|
||||||
|
|
||||||
def get_text_region_boxes_by_given_contours(contours):
|
def get_text_region_boxes_by_given_contours(contours):
|
||||||
|
|
||||||
kernel = np.ones((5, 5), np.uint8)
|
kernel = np.ones((5, 5), np.uint8)
|
||||||
boxes = []
|
boxes = []
|
||||||
contours_new = []
|
contours_new = []
|
||||||
|
@ -42,6 +44,7 @@ def get_text_region_boxes_by_given_contours(contours):
|
||||||
del contours
|
del contours
|
||||||
return boxes, contours_new
|
return boxes, contours_new
|
||||||
|
|
||||||
|
|
||||||
def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area):
|
def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area):
|
||||||
found_polygons_early = list()
|
found_polygons_early = list()
|
||||||
|
|
||||||
|
@ -51,10 +54,12 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area
|
||||||
|
|
||||||
polygon = geometry.Polygon([point[0] for point in c])
|
polygon = geometry.Polygon([point[0] for point in c])
|
||||||
area = polygon.area
|
area = polygon.area
|
||||||
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1: # and hierarchy[0][jv][3]==-1 :
|
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and \
|
||||||
|
hierarchy[0][jv][3] == -1: # and hierarchy[0][jv][3]==-1 :
|
||||||
found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint))
|
found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint))
|
||||||
return found_polygons_early
|
return found_polygons_early
|
||||||
|
|
||||||
|
|
||||||
def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
|
def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area):
|
||||||
found_polygons_early = list()
|
found_polygons_early = list()
|
||||||
|
|
||||||
|
@ -65,16 +70,17 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
|
||||||
polygon = geometry.Polygon([point[0] for point in c])
|
polygon = geometry.Polygon([point[0] for point in c])
|
||||||
# area = cv2.contourArea(c)
|
# area = cv2.contourArea(c)
|
||||||
area = polygon.area
|
area = polygon.area
|
||||||
##print(np.prod(thresh.shape[:2]))
|
# print(np.prod(thresh.shape[:2]))
|
||||||
# Check that polygon has area greater than minimal area
|
# Check that polygon has area greater than minimal area
|
||||||
# print(hierarchy[0][jv][3],hierarchy )
|
# print(hierarchy[0][jv][3],hierarchy )
|
||||||
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]): # and hierarchy[0][jv][3]==-1 :
|
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(
|
||||||
|
image.shape[:2]): # and hierarchy[0][jv][3]==-1 :
|
||||||
# print(c[0][0][1])
|
# print(c[0][0][1])
|
||||||
found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32))
|
found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32))
|
||||||
return found_polygons_early
|
return found_polygons_early
|
||||||
|
|
||||||
def find_new_features_of_contours(contours_main):
|
|
||||||
|
|
||||||
|
def find_new_features_of_contours(contours_main):
|
||||||
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
|
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
|
||||||
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
||||||
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||||
|
@ -85,7 +91,8 @@ def find_new_features_of_contours(contours_main):
|
||||||
argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
||||||
|
|
||||||
x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0] for j in range(len(contours_main))])
|
x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0] for j in range(len(contours_main))])
|
||||||
y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 1] for j in range(len(contours_main))])
|
y_corr_x_min_from_argmin = np.array(
|
||||||
|
[contours_main[j][argmin_x_main[j], 0, 1] for j in range(len(contours_main))])
|
||||||
|
|
||||||
x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
||||||
|
|
||||||
|
@ -107,9 +114,9 @@ def find_new_features_of_contours(contours_main):
|
||||||
# dis_x=np.abs(x_max_main-x_min_main)
|
# dis_x=np.abs(x_max_main-x_min_main)
|
||||||
|
|
||||||
return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin
|
return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin
|
||||||
|
|
||||||
|
|
||||||
def find_features_of_contours(contours_main):
|
def find_features_of_contours(contours_main):
|
||||||
|
|
||||||
|
|
||||||
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
|
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
|
||||||
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
||||||
cx_main = [(M_main[j]['m10'] / (M_main[j]['m00'] + 1e-32)) for j in range(len(M_main))]
|
cx_main = [(M_main[j]['m10'] / (M_main[j]['m00'] + 1e-32)) for j in range(len(M_main))]
|
||||||
|
@ -120,14 +127,15 @@ def find_features_of_contours(contours_main):
|
||||||
y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
||||||
y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
||||||
|
|
||||||
|
|
||||||
return y_min_main, y_max_main
|
return y_min_main, y_max_main
|
||||||
|
|
||||||
|
|
||||||
def return_parent_contours(contours, hierarchy):
|
def return_parent_contours(contours, hierarchy):
|
||||||
contours_parent = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3] == -1]
|
contours_parent = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3] == -1]
|
||||||
return contours_parent
|
return contours_parent
|
||||||
|
|
||||||
def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
|
|
||||||
|
|
||||||
|
def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
|
||||||
# pixels of images are identified by 5
|
# pixels of images are identified by 5
|
||||||
if len(region_pre_p.shape) == 3:
|
if len(region_pre_p.shape) == 3:
|
||||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||||
|
@ -145,6 +153,7 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
|
||||||
|
|
||||||
return contours_imgs
|
return contours_imgs
|
||||||
|
|
||||||
|
|
||||||
def do_work_of_contours_in_image(queue_of_all_params, contours_per_process, indexes_r_con_per_pro, img, slope_first):
|
def do_work_of_contours_in_image(queue_of_all_params, contours_per_process, indexes_r_con_per_pro, img, slope_first):
|
||||||
cnts_org_per_each_subprocess = []
|
cnts_org_per_each_subprocess = []
|
||||||
index_by_text_region_contours = []
|
index_by_text_region_contours = []
|
||||||
|
@ -165,14 +174,12 @@ def do_work_of_contours_in_image(queue_of_all_params, contours_per_process, inde
|
||||||
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
||||||
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
|
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
|
||||||
|
|
||||||
|
|
||||||
cnts_org_per_each_subprocess.append(cont_int[0])
|
cnts_org_per_each_subprocess.append(cont_int[0])
|
||||||
|
|
||||||
queue_of_all_params.put([cnts_org_per_each_subprocess, index_by_text_region_contours])
|
queue_of_all_params.put([cnts_org_per_each_subprocess, index_by_text_region_contours])
|
||||||
|
|
||||||
|
|
||||||
def get_textregion_contours_in_org_image_multi(cnts, img, slope_first):
|
def get_textregion_contours_in_org_image_multi(cnts, img, slope_first):
|
||||||
|
|
||||||
num_cores = cpu_count()
|
num_cores = cpu_count()
|
||||||
queue_of_all_params = Queue()
|
queue_of_all_params = Queue()
|
||||||
|
|
||||||
|
@ -200,6 +207,8 @@ def get_textregion_contours_in_org_image_multi(cnts, img, slope_first):
|
||||||
|
|
||||||
print(all_index_text_con)
|
print(all_index_text_con)
|
||||||
return cnts_org
|
return cnts_org
|
||||||
|
|
||||||
|
|
||||||
def loop_contour_image(index_l, cnts, img, slope_first):
|
def loop_contour_image(index_l, cnts, img, slope_first):
|
||||||
img_copy = np.zeros(img.shape)
|
img_copy = np.zeros(img.shape)
|
||||||
img_copy = cv2.fillPoly(img_copy, pts=[cnts[index_l]], color=(1, 1, 1))
|
img_copy = cv2.fillPoly(img_copy, pts=[cnts[index_l]], color=(1, 1, 1))
|
||||||
|
@ -209,7 +218,7 @@ def loop_contour_image(index_l, cnts,img, slope_first):
|
||||||
|
|
||||||
# print(img.shape,'img')
|
# print(img.shape,'img')
|
||||||
img_copy = rotation_image_new(img_copy, -slope_first)
|
img_copy = rotation_image_new(img_copy, -slope_first)
|
||||||
##print(img_copy.shape,'img_copy')
|
# print(img_copy.shape,'img_copy')
|
||||||
# plt.imshow(img_copy)
|
# plt.imshow(img_copy)
|
||||||
# plt.show()
|
# plt.show()
|
||||||
|
|
||||||
|
@ -224,8 +233,8 @@ def loop_contour_image(index_l, cnts,img, slope_first):
|
||||||
# print(np.shape(cont_int[0]))
|
# print(np.shape(cont_int[0]))
|
||||||
return cont_int[0]
|
return cont_int[0]
|
||||||
|
|
||||||
def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first):
|
|
||||||
|
|
||||||
|
def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first):
|
||||||
cnts_org = []
|
cnts_org = []
|
||||||
# print(cnts,'cnts')
|
# print(cnts,'cnts')
|
||||||
with Pool(cpu_count()) as p:
|
with Pool(cpu_count()) as p:
|
||||||
|
@ -233,8 +242,8 @@ def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first):
|
||||||
|
|
||||||
return cnts_org
|
return cnts_org
|
||||||
|
|
||||||
def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
|
||||||
|
|
||||||
|
def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
||||||
cnts_org = []
|
cnts_org = []
|
||||||
# print(cnts,'cnts')
|
# print(cnts,'cnts')
|
||||||
for i in range(len(cnts)):
|
for i in range(len(cnts)):
|
||||||
|
@ -246,7 +255,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
||||||
|
|
||||||
# print(img.shape,'img')
|
# print(img.shape,'img')
|
||||||
img_copy = rotation_image_new(img_copy, -slope_first)
|
img_copy = rotation_image_new(img_copy, -slope_first)
|
||||||
##print(img_copy.shape,'img_copy')
|
# print(img_copy.shape,'img_copy')
|
||||||
# plt.imshow(img_copy)
|
# plt.imshow(img_copy)
|
||||||
# plt.show()
|
# plt.show()
|
||||||
|
|
||||||
|
@ -263,13 +272,13 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
||||||
|
|
||||||
return cnts_org
|
return cnts_org
|
||||||
|
|
||||||
def get_textregion_contours_in_org_image_light(cnts, img, slope_first):
|
|
||||||
|
|
||||||
|
def get_textregion_contours_in_org_image_light(cnts, img, slope_first):
|
||||||
h_o = img.shape[0]
|
h_o = img.shape[0]
|
||||||
w_o = img.shape[1]
|
w_o = img.shape[1]
|
||||||
|
|
||||||
img = cv2.resize(img, (int(img.shape[1] / 3.), int(img.shape[0] / 3.)), interpolation=cv2.INTER_NEAREST)
|
img = cv2.resize(img, (int(img.shape[1] / 3.), int(img.shape[0] / 3.)), interpolation=cv2.INTER_NEAREST)
|
||||||
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
|
# cnts = list( (np.array(cnts)/2).astype(np.int16) )
|
||||||
# cnts = cnts/2
|
# cnts = cnts/2
|
||||||
cnts = [(i / 3).astype(np.int32) for i in cnts]
|
cnts = [(i / 3).astype(np.int32) for i in cnts]
|
||||||
cnts_org = []
|
cnts_org = []
|
||||||
|
@ -283,7 +292,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first):
|
||||||
|
|
||||||
# print(img.shape,'img')
|
# print(img.shape,'img')
|
||||||
img_copy = rotation_image_new(img_copy, -slope_first)
|
img_copy = rotation_image_new(img_copy, -slope_first)
|
||||||
##print(img_copy.shape,'img_copy')
|
# print(img_copy.shape,'img_copy')
|
||||||
# plt.imshow(img_copy)
|
# plt.imshow(img_copy)
|
||||||
# plt.show()
|
# plt.show()
|
||||||
|
|
||||||
|
@ -300,8 +309,8 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first):
|
||||||
|
|
||||||
return cnts_org
|
return cnts_org
|
||||||
|
|
||||||
def return_contours_of_interested_textline(region_pre_p, pixel):
|
|
||||||
|
|
||||||
|
def return_contours_of_interested_textline(region_pre_p, pixel):
|
||||||
# pixels of images are identified by 5
|
# pixels of images are identified by 5
|
||||||
if len(region_pre_p.shape) == 3:
|
if len(region_pre_p.shape) == 3:
|
||||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||||
|
@ -317,8 +326,8 @@ def return_contours_of_interested_textline(region_pre_p, pixel):
|
||||||
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=1, min_area=0.000000003)
|
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=1, min_area=0.000000003)
|
||||||
return contours_imgs
|
return contours_imgs
|
||||||
|
|
||||||
def return_contours_of_image(image):
|
|
||||||
|
|
||||||
|
def return_contours_of_image(image):
|
||||||
if len(image.shape) == 2:
|
if len(image.shape) == 2:
|
||||||
image = np.repeat(image[:, :, np.newaxis], 3, axis=2)
|
image = np.repeat(image[:, :, np.newaxis], 3, axis=2)
|
||||||
image = image.astype(np.uint8)
|
image = image.astype(np.uint8)
|
||||||
|
@ -329,8 +338,8 @@ def return_contours_of_image(image):
|
||||||
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
return contours, hierarchy
|
return contours, hierarchy
|
||||||
|
|
||||||
def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003):
|
|
||||||
|
|
||||||
|
def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003):
|
||||||
# pixels of images are identified by 5
|
# pixels of images are identified by 5
|
||||||
if len(region_pre_p.shape) == 3:
|
if len(region_pre_p.shape) == 3:
|
||||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||||
|
@ -348,8 +357,8 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si
|
||||||
|
|
||||||
return contours_imgs
|
return contours_imgs
|
||||||
|
|
||||||
def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area):
|
|
||||||
|
|
||||||
|
def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area):
|
||||||
# pixels of images are identified by 5
|
# pixels of images are identified by 5
|
||||||
if len(region_pre_p.shape) == 3:
|
if len(region_pre_p.shape) == 3:
|
||||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||||
|
@ -367,4 +376,3 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area,
|
||||||
img_ret = np.zeros((region_pre_p.shape[0], region_pre_p.shape[1], 3))
|
img_ret = np.zeros((region_pre_p.shape[0], region_pre_p.shape[1], 3))
|
||||||
img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1))
|
img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1))
|
||||||
return img_ret[:, :, 0]
|
return img_ret[:, :, 0]
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@ from collections import Counter
|
||||||
REGION_ID_TEMPLATE = 'region_%04d'
|
REGION_ID_TEMPLATE = 'region_%04d'
|
||||||
LINE_ID_TEMPLATE = 'region_%04d_line_%04d'
|
LINE_ID_TEMPLATE = 'region_%04d_line_%04d'
|
||||||
|
|
||||||
|
|
||||||
class EynollahIdCounter():
|
class EynollahIdCounter():
|
||||||
|
|
||||||
def __init__(self, region_idx=0, line_idx=0):
|
def __init__(self, region_idx=0, line_idx=0):
|
||||||
|
|
|
@ -6,6 +6,7 @@ from .contour import (
|
||||||
return_parent_contours,
|
return_parent_contours,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def adhere_drop_capital_region_into_corresponding_textline(
|
def adhere_drop_capital_region_into_corresponding_textline(
|
||||||
text_regions_p,
|
text_regions_p,
|
||||||
polygons_of_drop_capitals,
|
polygons_of_drop_capitals,
|
||||||
|
@ -44,7 +45,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
||||||
|
|
||||||
# plt.imshow(img_con[:,:,0])
|
# plt.imshow(img_con[:,:,0])
|
||||||
# plt.show()
|
# plt.show()
|
||||||
##img_con=cv2.dilate(img_con, kernel, iterations=30)
|
# img_con=cv2.dilate(img_con, kernel, iterations=30)
|
||||||
|
|
||||||
# plt.imshow(img_con[:,:,0])
|
# plt.imshow(img_con[:,:,0])
|
||||||
# plt.show()
|
# plt.show()
|
||||||
|
@ -185,7 +186,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
||||||
# contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
|
# contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
|
||||||
# print(np.shape(contours_biggest),'contours_biggest')
|
# print(np.shape(contours_biggest),'contours_biggest')
|
||||||
# print(np.shape(all_found_textline_polygons[int(region_final)][arg_min]))
|
# print(np.shape(all_found_textline_polygons[int(region_final)][arg_min]))
|
||||||
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
# contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||||
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
|
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
@ -230,7 +231,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
||||||
contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] # -all_box_coord[int(region_final)][2]
|
contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] # -all_box_coord[int(region_final)][2]
|
||||||
contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] # -all_box_coord[int(region_final)][0]
|
contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] # -all_box_coord[int(region_final)][0]
|
||||||
|
|
||||||
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
# contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||||
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
|
all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||||
# all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest
|
# all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||||
|
|
||||||
|
@ -239,49 +240,49 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
# cx_t,cy_t ,_, _, _ ,_,_ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)])
|
||||||
###print(all_box_coord[j_cont])
|
# print(all_box_coord[j_cont])
|
||||||
###print(cx_t)
|
# print(cx_t)
|
||||||
###print(cy_t)
|
# print(cy_t)
|
||||||
###print(cx_d[i_drop])
|
# print(cx_d[i_drop])
|
||||||
###print(cy_d[i_drop])
|
# print(cy_d[i_drop])
|
||||||
##y_lines=all_box_coord[int(region_final)][0]+np.array(cy_t)
|
# y_lines = all_box_coord[int(region_final)][0]+np.array(cy_t)
|
||||||
|
|
||||||
##y_lines[y_lines<y_min_d[i_drop]]=0
|
# y_lines[y_lines<y_min_d[i_drop]]=0
|
||||||
###print(y_lines)
|
# print(y_lines)
|
||||||
|
|
||||||
##arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
|
# arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
|
||||||
###print(arg_min)
|
# print(arg_min)
|
||||||
|
|
||||||
##cnt_nearest=np.copy(all_found_textline_polygons[int(region_final)][arg_min])
|
# cnt_nearest=np.copy(all_found_textline_polygons[int(region_final)][arg_min])
|
||||||
##cnt_nearest[:,0,0]=all_found_textline_polygons[int(region_final)][arg_min][:,0,0]#+all_box_coord[int(region_final)][2]
|
# cnt_nearest[:,0,0]=all_found_textline_polygons[int(region_final)][arg_min][:,0,0]#+all_box_coord[int(region_final)][2]
|
||||||
##cnt_nearest[:,0,1]=all_found_textline_polygons[int(region_final)][arg_min][:,0,1]#+all_box_coord[int(region_final)][0]
|
# cnt_nearest[:,0,1]=all_found_textline_polygons[int(region_final)][arg_min][:,0,1]#+all_box_coord[int(region_final)][0]
|
||||||
|
|
||||||
##img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
# img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||||
##img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
|
# img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
|
||||||
##img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
# img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
||||||
|
|
||||||
##img_textlines=img_textlines.astype(np.uint8)
|
# img_textlines=img_textlines.astype(np.uint8)
|
||||||
|
|
||||||
##plt.imshow(img_textlines)
|
# plt.imshow(img_textlines)
|
||||||
##plt.show()
|
# plt.show()
|
||||||
##imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
# imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||||
##ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||||
|
|
||||||
##contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
# contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
|
||||||
##print(len(contours_combined),'len textlines mixed')
|
# print(len(contours_combined),'len textlines mixed')
|
||||||
##areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
# areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||||
|
|
||||||
##contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
|
# contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
|
||||||
|
|
||||||
###print(np.shape(contours_biggest))
|
# rint(np.shape(contours_biggest))
|
||||||
###print(contours_biggest[:])
|
# print(contours_biggest[:])
|
||||||
##contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
|
# contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
|
||||||
##contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
|
# contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
|
||||||
|
|
||||||
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
# contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||||
##all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest
|
# all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if len(region_with_intersected_drop) > 1:
|
if len(region_with_intersected_drop) > 1:
|
||||||
|
@ -399,71 +400,72 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
#####for i_drop in range(len(polygons_of_drop_capitals)):
|
# for i_drop in range(len(polygons_of_drop_capitals)):
|
||||||
#####for j_cont in range(len(contours_only_text_parent)):
|
# for j_cont in range(len(contours_only_text_parent)):
|
||||||
#####img_con=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
# img_con=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||||
#####img_con=cv2.fillPoly(img_con,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
# img_con=cv2.fillPoly(img_con,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
||||||
#####img_con=cv2.fillPoly(img_con,pts=[contours_only_text_parent[j_cont]],color=(255,255,255))
|
# img_con=cv2.fillPoly(img_con,pts=[contours_only_text_parent[j_cont]],color=(255,255,255))
|
||||||
|
|
||||||
#####img_con=img_con.astype(np.uint8)
|
# img_con=img_con.astype(np.uint8)
|
||||||
######imgray = cv2.cvtColor(img_con, cv2.COLOR_BGR2GRAY)
|
# imgray = cv2.cvtColor(img_con, cv2.COLOR_BGR2GRAY)
|
||||||
######ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||||
|
|
||||||
######contours_new,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
# contours_new,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
|
||||||
#####contours_new,hir_new=return_contours_of_image(img_con)
|
# contours_new,hir_new=return_contours_of_image(img_con)
|
||||||
#####contours_new_parent=return_parent_contours( contours_new,hir_new)
|
# contours_new_parent=return_parent_contours( contours_new,hir_new)
|
||||||
######plt.imshow(img_con)
|
# plt.imshow(img_con)
|
||||||
######plt.show()
|
# plt.show()
|
||||||
#####try:
|
# try:
|
||||||
#####if len(contours_new_parent)==1:
|
# if len(contours_new_parent)==1:
|
||||||
######print(all_found_textline_polygons[j_cont][0])
|
# print(all_found_textline_polygons[j_cont][0])
|
||||||
#####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont])
|
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont])
|
||||||
######print(all_box_coord[j_cont])
|
# print(all_box_coord[j_cont])
|
||||||
######print(cx_t)
|
# print(cx_t)
|
||||||
######print(cy_t)
|
# print(cy_t)
|
||||||
######print(cx_d[i_drop])
|
# print(cx_d[i_drop])
|
||||||
######print(cy_d[i_drop])
|
# print(cy_d[i_drop])
|
||||||
#####y_lines=all_box_coord[j_cont][0]+np.array(cy_t)
|
# y_lines=all_box_coord[j_cont][0]+np.array(cy_t)
|
||||||
|
|
||||||
######print(y_lines)
|
# print(y_lines)
|
||||||
|
|
||||||
#####arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
|
# arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
|
||||||
######print(arg_min)
|
# print(arg_min)
|
||||||
|
|
||||||
#####cnt_nearest=np.copy(all_found_textline_polygons[j_cont][arg_min])
|
# cnt_nearest=np.copy(all_found_textline_polygons[j_cont][arg_min])
|
||||||
#####cnt_nearest[:,0]=all_found_textline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2]
|
# cnt_nearest[:,0]=all_found_textline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2]
|
||||||
#####cnt_nearest[:,1]=all_found_textline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0]
|
# cnt_nearest[:,1]=all_found_textline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0]
|
||||||
|
|
||||||
#####img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
# img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||||
#####img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
|
# img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
|
||||||
#####img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
# img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
||||||
|
|
||||||
#####img_textlines=img_textlines.astype(np.uint8)
|
# img_textlines=img_textlines.astype(np.uint8)
|
||||||
#####imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
# imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||||
#####ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
# ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||||
|
|
||||||
#####contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
# contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
|
||||||
#####areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
# areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||||
|
|
||||||
#####contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
|
# contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
|
||||||
|
|
||||||
######print(np.shape(contours_biggest))
|
# print(np.shape(contours_biggest))
|
||||||
######print(contours_biggest[:])
|
# print(contours_biggest[:])
|
||||||
#####contours_biggest[:,0,0]=contours_biggest[:,0,0]-all_box_coord[j_cont][2]
|
# contours_biggest[:,0,0]=contours_biggest[:,0,0]-all_box_coord[j_cont][2]
|
||||||
#####contours_biggest[:,0,1]=contours_biggest[:,0,1]-all_box_coord[j_cont][0]
|
# contours_biggest[:,0,1]=contours_biggest[:,0,1]-all_box_coord[j_cont][0]
|
||||||
|
|
||||||
#####all_found_textline_polygons[j_cont][arg_min]=contours_biggest
|
# all_found_textline_polygons[j_cont][arg_min]=contours_biggest
|
||||||
######print(contours_biggest)
|
# print(contours_biggest)
|
||||||
######plt.imshow(img_textlines[:,:,0])
|
# plt.imshow(img_textlines[:,:,0])
|
||||||
######plt.show()
|
# plt.show()
|
||||||
#####else:
|
# else:
|
||||||
#####pass
|
# pass
|
||||||
#####except:
|
# except:
|
||||||
#####pass
|
# pass
|
||||||
return all_found_textline_polygons
|
return all_found_textline_polygons
|
||||||
|
|
||||||
|
|
||||||
def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1):
|
def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1):
|
||||||
|
|
||||||
drop_only = (layout_no_patch[:, :, 0] == 4) * 1
|
drop_only = (layout_no_patch[:, :, 0] == 4) * 1
|
||||||
|
|
|
@ -3,18 +3,17 @@ import cv2
|
||||||
from scipy.signal import find_peaks
|
from scipy.signal import find_peaks
|
||||||
from scipy.ndimage import gaussian_filter1d
|
from scipy.ndimage import gaussian_filter1d
|
||||||
|
|
||||||
|
|
||||||
from .contour import find_new_features_of_contours, return_contours_of_interested_region
|
from .contour import find_new_features_of_contours, return_contours_of_interested_region
|
||||||
from .resize import resize_image
|
from .resize import resize_image
|
||||||
from .rotate import rotate_image
|
from .rotate import rotate_image
|
||||||
|
|
||||||
|
|
||||||
def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None):
|
def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None):
|
||||||
mask_marginals = np.zeros((text_with_lines.shape[0], text_with_lines.shape[1]))
|
mask_marginals = np.zeros((text_with_lines.shape[0], text_with_lines.shape[1]))
|
||||||
mask_marginals = mask_marginals.astype(np.uint8)
|
mask_marginals = mask_marginals.astype(np.uint8)
|
||||||
|
|
||||||
|
|
||||||
text_with_lines = text_with_lines.astype(np.uint8)
|
text_with_lines = text_with_lines.astype(np.uint8)
|
||||||
##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3)
|
# text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3)
|
||||||
|
|
||||||
text_with_lines_eroded = cv2.erode(text_with_lines, kernel, iterations=5)
|
text_with_lines_eroded = cv2.erode(text_with_lines, kernel, iterations=5)
|
||||||
|
|
||||||
|
@ -29,7 +28,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N
|
||||||
text_with_lines = cv2.erode(text_with_lines, kernel, iterations=7)
|
text_with_lines = cv2.erode(text_with_lines, kernel, iterations=7)
|
||||||
text_with_lines = resize_image(text_with_lines, text_with_lines_eroded.shape[0], text_with_lines_eroded.shape[1])
|
text_with_lines = resize_image(text_with_lines, text_with_lines_eroded.shape[0], text_with_lines_eroded.shape[1])
|
||||||
|
|
||||||
|
|
||||||
text_with_lines_y = text_with_lines.sum(axis=0)
|
text_with_lines_y = text_with_lines.sum(axis=0)
|
||||||
text_with_lines_y_eroded = text_with_lines_eroded.sum(axis=0)
|
text_with_lines_y_eroded = text_with_lines_eroded.sum(axis=0)
|
||||||
|
|
||||||
|
@ -44,21 +42,15 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N
|
||||||
else:
|
else:
|
||||||
min_textline_thickness = 40
|
min_textline_thickness = 40
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if thickness_along_y_percent >= 14:
|
if thickness_along_y_percent >= 14:
|
||||||
|
|
||||||
text_with_lines_y_rev = -1 * text_with_lines_y[:]
|
text_with_lines_y_rev = -1 * text_with_lines_y[:]
|
||||||
# print(text_with_lines_y)
|
# print(text_with_lines_y)
|
||||||
# print(text_with_lines_y_rev)
|
# print(text_with_lines_y_rev)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# plt.plot(text_with_lines_y)
|
# plt.plot(text_with_lines_y)
|
||||||
# plt.show()
|
# plt.show()
|
||||||
|
|
||||||
|
|
||||||
text_with_lines_y_rev = text_with_lines_y_rev - np.min(text_with_lines_y_rev)
|
text_with_lines_y_rev = text_with_lines_y_rev - np.min(text_with_lines_y_rev)
|
||||||
|
|
||||||
# plt.plot(text_with_lines_y_rev)
|
# plt.plot(text_with_lines_y_rev)
|
||||||
|
@ -75,41 +67,32 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N
|
||||||
first_nonzero = (next((i for i, x in enumerate(region_sum_0) if x), None))
|
first_nonzero = (next((i for i, x in enumerate(region_sum_0) if x), None))
|
||||||
last_nonzero = (next((i for i, x in enumerate(region_sum_0_updown) if x), None))
|
last_nonzero = (next((i for i, x in enumerate(region_sum_0_updown) if x), None))
|
||||||
|
|
||||||
|
|
||||||
last_nonzero = len(region_sum_0) - last_nonzero
|
last_nonzero = len(region_sum_0) - last_nonzero
|
||||||
|
|
||||||
##img_sum_0_smooth_rev=-region_sum_0
|
# img_sum_0_smooth_rev=-region_sum_0
|
||||||
|
|
||||||
|
|
||||||
mid_point = (last_nonzero + first_nonzero) / 2.
|
mid_point = (last_nonzero + first_nonzero) / 2.
|
||||||
|
|
||||||
|
|
||||||
one_third_right = (last_nonzero - mid_point) / 3.0
|
one_third_right = (last_nonzero - mid_point) / 3.0
|
||||||
one_third_left = (mid_point - first_nonzero) / 3.0
|
one_third_left = (mid_point - first_nonzero) / 3.0
|
||||||
|
|
||||||
# img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev)
|
# img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
peaks, _ = find_peaks(text_with_lines_y_rev, height=0)
|
peaks, _ = find_peaks(text_with_lines_y_rev, height=0)
|
||||||
|
|
||||||
|
|
||||||
peaks = np.array(peaks)
|
peaks = np.array(peaks)
|
||||||
|
|
||||||
|
|
||||||
# print(region_sum_0[peaks])
|
# print(region_sum_0[peaks])
|
||||||
##plt.plot(region_sum_0)
|
# plt.plot(region_sum_0)
|
||||||
##plt.plot(peaks,region_sum_0[peaks],'*')
|
# plt.plot(peaks,region_sum_0[peaks],'*')
|
||||||
##plt.show()
|
# plt.show()
|
||||||
# print(first_nonzero,last_nonzero,peaks)
|
# print(first_nonzero,last_nonzero,peaks)
|
||||||
peaks=peaks[(peaks>first_nonzero) & ((peaks<last_nonzero))]
|
peaks = peaks[(peaks > first_nonzero) & (peaks < last_nonzero)]
|
||||||
|
|
||||||
# print(first_nonzero,last_nonzero,peaks)
|
# print(first_nonzero,last_nonzero,peaks)
|
||||||
|
|
||||||
|
|
||||||
# print(region_sum_0[peaks]<10)
|
# print(region_sum_0[peaks]<10)
|
||||||
####peaks=peaks[region_sum_0[peaks]<25 ]
|
# peaks=peaks[region_sum_0[peaks]<25 ]
|
||||||
|
|
||||||
# print(region_sum_0[peaks])
|
# print(region_sum_0[peaks])
|
||||||
peaks = peaks[region_sum_0[peaks] < min_textline_thickness]
|
peaks = peaks[region_sum_0[peaks] < min_textline_thickness]
|
||||||
|
@ -123,21 +106,16 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N
|
||||||
peaks_right = peaks[peaks > (mid_point + one_third_right)]
|
peaks_right = peaks[peaks > (mid_point + one_third_right)]
|
||||||
peaks_left = peaks[peaks < (mid_point - one_third_left)]
|
peaks_left = peaks[peaks < (mid_point - one_third_left)]
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
point_right = np.min(peaks_right)
|
point_right = np.min(peaks_right)
|
||||||
except:
|
except:
|
||||||
point_right = last_nonzero
|
point_right = last_nonzero
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
point_left = np.max(peaks_left)
|
point_left = np.max(peaks_left)
|
||||||
except:
|
except:
|
||||||
point_left = first_nonzero
|
point_left = first_nonzero
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# print(point_left,point_right)
|
# print(point_left,point_right)
|
||||||
# print(text_regions.shape)
|
# print(text_regions.shape)
|
||||||
if point_right >= mask_marginals.shape[1]:
|
if point_right >= mask_marginals.shape[1]:
|
||||||
|
@ -167,7 +145,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N
|
||||||
if max_point_of_right_marginal >= text_regions.shape[1]:
|
if max_point_of_right_marginal >= text_regions.shape[1]:
|
||||||
max_point_of_right_marginal = text_regions.shape[1] - 1
|
max_point_of_right_marginal = text_regions.shape[1] - 1
|
||||||
|
|
||||||
|
|
||||||
# print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew')
|
# print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew')
|
||||||
# print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated')
|
# print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated')
|
||||||
# plt.imshow(mask_marginals)
|
# plt.imshow(mask_marginals)
|
||||||
|
@ -219,9 +196,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N
|
||||||
if len(x_min_marginals_right) == 0:
|
if len(x_min_marginals_right) == 0:
|
||||||
x_min_marginals_right = [text_regions.shape[1] - 1]
|
x_min_marginals_right = [text_regions.shape[1] - 1]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# print(x_min_marginals_left[0],x_min_marginals_right[0],'margo')
|
# print(x_min_marginals_left[0],x_min_marginals_right[0],'margo')
|
||||||
|
|
||||||
# print(marginlas_should_be_main_text,'marginlas_should_be_main_text')
|
# print(marginlas_should_be_main_text,'marginlas_should_be_main_text')
|
||||||
|
@ -235,14 +209,13 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N
|
||||||
text_regions[:, :int(min_point_of_left_marginal)][text_regions[:, :int(min_point_of_left_marginal)] == 1] = 0
|
text_regions[:, :int(min_point_of_left_marginal)][text_regions[:, :int(min_point_of_left_marginal)] == 1] = 0
|
||||||
text_regions[:, int(max_point_of_right_marginal):][text_regions[:, int(max_point_of_right_marginal):] == 1] = 0
|
text_regions[:, int(max_point_of_right_marginal):][text_regions[:, int(max_point_of_right_marginal):] == 1] = 0
|
||||||
|
|
||||||
###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4
|
# text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4
|
||||||
|
|
||||||
###text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4
|
# text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4
|
||||||
# plt.plot(region_sum_0)
|
# plt.plot(region_sum_0)
|
||||||
# plt.plot(peaks,region_sum_0[peaks],'*')
|
# plt.plot(peaks,region_sum_0[peaks],'*')
|
||||||
# plt.show()
|
# plt.show()
|
||||||
|
|
||||||
|
|
||||||
# plt.imshow(text_regions)
|
# plt.imshow(text_regions)
|
||||||
# plt.show()
|
# plt.show()
|
||||||
|
|
||||||
|
|
|
@ -5,15 +5,18 @@ from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, COLOR_BGR2RGB, cvtColor, imread
|
||||||
|
|
||||||
# from sbb_binarization
|
# from sbb_binarization
|
||||||
|
|
||||||
|
|
||||||
def cv2pil(img):
|
def cv2pil(img):
|
||||||
return Image.fromarray(np.array(cvtColor(img, COLOR_BGR2RGB)))
|
return Image.fromarray(np.array(cvtColor(img, COLOR_BGR2RGB)))
|
||||||
|
|
||||||
|
|
||||||
def pil2cv(img):
|
def pil2cv(img):
|
||||||
# from ocrd/workspace.py
|
# from ocrd/workspace.py
|
||||||
color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else COLOR_RGB2BGR
|
color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else COLOR_RGB2BGR
|
||||||
pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
|
pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
|
||||||
return cvtColor(pil_as_np_array, color_conversion)
|
return cvtColor(pil_as_np_array, color_conversion)
|
||||||
|
|
||||||
|
|
||||||
def check_dpi(img):
|
def check_dpi(img):
|
||||||
try:
|
try:
|
||||||
if isinstance(img, Image.Image):
|
if isinstance(img, Image.Image):
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import cv2
|
import cv2
|
||||||
|
|
||||||
|
|
||||||
def resize_image(img_in, input_height, input_width):
|
def resize_image(img_in, input_height, input_width):
|
||||||
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
|
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
|
||||||
|
|
|
@ -3,6 +3,7 @@ import math
|
||||||
import imutils
|
import imutils
|
||||||
import cv2
|
import cv2
|
||||||
|
|
||||||
|
|
||||||
def rotatedRectWithMaxArea(w, h, angle):
|
def rotatedRectWithMaxArea(w, h, angle):
|
||||||
if w <= 0 or h <= 0:
|
if w <= 0 or h <= 0:
|
||||||
return 0, 0
|
return 0, 0
|
||||||
|
@ -25,6 +26,7 @@ def rotatedRectWithMaxArea(w, h, angle):
|
||||||
|
|
||||||
return wr, hr
|
return wr, hr
|
||||||
|
|
||||||
|
|
||||||
def rotate_max_area_new(image, rotated, angle):
|
def rotate_max_area_new(image, rotated, angle):
|
||||||
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
|
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
|
||||||
h, w, _ = rotated.shape
|
h, w, _ = rotated.shape
|
||||||
|
@ -34,16 +36,19 @@ def rotate_max_area_new(image, rotated, angle):
|
||||||
x2 = x1 + int(wr)
|
x2 = x1 + int(wr)
|
||||||
return rotated[y1:y2, x1:x2]
|
return rotated[y1:y2, x1:x2]
|
||||||
|
|
||||||
|
|
||||||
def rotation_image_new(img, thetha):
|
def rotation_image_new(img, thetha):
|
||||||
rotated = imutils.rotate(img, thetha)
|
rotated = imutils.rotate(img, thetha)
|
||||||
return rotate_max_area_new(img, rotated, thetha)
|
return rotate_max_area_new(img, rotated, thetha)
|
||||||
|
|
||||||
|
|
||||||
def rotate_image(img_patch, slope):
|
def rotate_image(img_patch, slope):
|
||||||
(h, w) = img_patch.shape[:2]
|
(h, w) = img_patch.shape[:2]
|
||||||
center = (w // 2, h // 2)
|
center = (w // 2, h // 2)
|
||||||
M = cv2.getRotationMatrix2D(center, slope, 1.0)
|
M = cv2.getRotationMatrix2D(center, slope, 1.0)
|
||||||
return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
|
return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
|
||||||
|
|
||||||
|
|
||||||
def rotate_image_different(img, slope):
|
def rotate_image_different(img, slope):
|
||||||
# img = cv2.imread('images/input.jpg')
|
# img = cv2.imread('images/input.jpg')
|
||||||
num_rows, num_cols = img.shape[:2]
|
num_rows, num_cols = img.shape[:2]
|
||||||
|
@ -52,6 +57,7 @@ def rotate_image_different( img, slope):
|
||||||
img_rotation = cv2.warpAffine(img, rotation_matrix, (num_cols, num_rows))
|
img_rotation = cv2.warpAffine(img, rotation_matrix, (num_cols, num_rows))
|
||||||
return img_rotation
|
return img_rotation
|
||||||
|
|
||||||
|
|
||||||
def rotate_max_area(image, rotated, rotated_textline, rotated_layout, rotated_table_prediction, angle):
|
def rotate_max_area(image, rotated, rotated_textline, rotated_layout, rotated_table_prediction, angle):
|
||||||
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
|
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
|
||||||
h, w, _ = rotated.shape
|
h, w, _ = rotated.shape
|
||||||
|
@ -61,6 +67,7 @@ def rotate_max_area(image, rotated, rotated_textline, rotated_layout, rotated_ta
|
||||||
x2 = x1 + int(wr)
|
x2 = x1 + int(wr)
|
||||||
return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_table_prediction[y1:y2, x1:x2]
|
return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_table_prediction[y1:y2, x1:x2]
|
||||||
|
|
||||||
|
|
||||||
def rotation_not_90_func(img, textline, text_regions_p_1, table_prediction, thetha):
|
def rotation_not_90_func(img, textline, text_regions_p_1, table_prediction, thetha):
|
||||||
rotated = imutils.rotate(img, thetha)
|
rotated = imutils.rotate(img, thetha)
|
||||||
rotated_textline = imutils.rotate(textline, thetha)
|
rotated_textline = imutils.rotate(textline, thetha)
|
||||||
|
@ -68,6 +75,7 @@ def rotation_not_90_func(img, textline, text_regions_p_1, table_prediction, thet
|
||||||
rotated_table_prediction = imutils.rotate(table_prediction, thetha)
|
rotated_table_prediction = imutils.rotate(table_prediction, thetha)
|
||||||
return rotate_max_area(img, rotated, rotated_textline, rotated_layout, rotated_table_prediction, thetha)
|
return rotate_max_area(img, rotated, rotated_textline, rotated_layout, rotated_table_prediction, thetha)
|
||||||
|
|
||||||
|
|
||||||
def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regions_p_fully, thetha):
|
def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regions_p_fully, thetha):
|
||||||
rotated = imutils.rotate(img, thetha)
|
rotated = imutils.rotate(img, thetha)
|
||||||
rotated_textline = imutils.rotate(textline, thetha)
|
rotated_textline = imutils.rotate(textline, thetha)
|
||||||
|
@ -75,6 +83,7 @@ def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regio
|
||||||
rotated_layout_full = imutils.rotate(text_regions_p_fully, thetha)
|
rotated_layout_full = imutils.rotate(text_regions_p_fully, thetha)
|
||||||
return rotate_max_area_full_layout(img, rotated, rotated_textline, rotated_layout, rotated_layout_full, thetha)
|
return rotate_max_area_full_layout(img, rotated, rotated_textline, rotated_layout, rotated_layout_full, thetha)
|
||||||
|
|
||||||
|
|
||||||
def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout, rotated_layout_full, angle):
|
def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout, rotated_layout_full, angle):
|
||||||
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
|
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
|
||||||
h, w, _ = rotated.shape
|
h, w, _ = rotated.shape
|
||||||
|
|
|
@ -17,6 +17,7 @@ from . import (
|
||||||
isNaN,
|
isNaN,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
|
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
|
||||||
(h, w) = img_patch.shape[:2]
|
(h, w) = img_patch.shape[:2]
|
||||||
center = (w // 2, h // 2)
|
center = (w // 2, h // 2)
|
||||||
|
@ -125,8 +126,8 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
|
||||||
|
|
||||||
return x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix
|
return x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix
|
||||||
|
|
||||||
def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|
||||||
|
|
||||||
|
def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
(h, w) = img_patch.shape[:2]
|
(h, w) = img_patch.shape[:2]
|
||||||
center = (w // 2, h // 2)
|
center = (w // 2, h // 2)
|
||||||
M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
|
M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
|
||||||
|
@ -170,20 +171,17 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
y_padded_up_to_down_padded_e[20:len(y_padded_up_to_down_e) + 20] = y_padded_up_to_down_e
|
y_padded_up_to_down_padded_e[20:len(y_padded_up_to_down_e) + 20] = y_padded_up_to_down_e
|
||||||
y_padded_up_to_down_padded_e = gaussian_filter1d(y_padded_up_to_down_padded_e, 2)
|
y_padded_up_to_down_padded_e = gaussian_filter1d(y_padded_up_to_down_padded_e, 2)
|
||||||
|
|
||||||
|
|
||||||
peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0)
|
peaks_e, _ = find_peaks(y_padded_smoothed_e, height=0)
|
||||||
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
|
peaks_neg_e, _ = find_peaks(y_padded_up_to_down_padded_e, height=0)
|
||||||
neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
|
neg_peaks_max = np.max(y_padded_up_to_down_padded_e[peaks_neg_e])
|
||||||
|
|
||||||
arg_neg_must_be_deleted= np.array(range(len(peaks_neg_e)))[y_padded_up_to_down_padded_e[peaks_neg_e]/float(neg_peaks_max)<0.3 ]
|
arg_neg_must_be_deleted = np.array(range(len(peaks_neg_e)))[
|
||||||
|
y_padded_up_to_down_padded_e[peaks_neg_e] / float(neg_peaks_max) < 0.3]
|
||||||
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
|
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
|
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
|
||||||
arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1]
|
arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1]
|
||||||
|
|
||||||
|
|
||||||
peaks_new = peaks_e[:]
|
peaks_new = peaks_e[:]
|
||||||
peaks_neg_new = peaks_neg_e[:]
|
peaks_neg_new = peaks_neg_e[:]
|
||||||
|
|
||||||
|
@ -192,10 +190,10 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
|
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0] + 1])
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0] + 1])
|
||||||
for i in range(len(arg_diff_cluster) - 1):
|
for i in range(len(arg_diff_cluster) - 1):
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1:arg_diff_cluster[i+1]+1])
|
clusters_to_be_deleted.append(
|
||||||
|
arg_neg_must_be_deleted[arg_diff_cluster[i] + 1:arg_diff_cluster[i + 1] + 1])
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1:])
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1:])
|
||||||
|
|
||||||
|
|
||||||
if len(clusters_to_be_deleted) > 0:
|
if len(clusters_to_be_deleted) > 0:
|
||||||
peaks_new_extra = []
|
peaks_new_extra = []
|
||||||
for m in range(len(clusters_to_be_deleted)):
|
for m in range(len(clusters_to_be_deleted)):
|
||||||
|
@ -214,11 +212,9 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
peaks_new_tot.append(i1)
|
peaks_new_tot.append(i1)
|
||||||
peaks_new_tot = np.sort(peaks_new_tot)
|
peaks_new_tot = np.sort(peaks_new_tot)
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
peaks_new_tot = peaks_e[:]
|
peaks_new_tot = peaks_e[:]
|
||||||
|
|
||||||
|
|
||||||
textline_con, hierarchy = return_contours_of_image(img_patch)
|
textline_con, hierarchy = return_contours_of_image(img_patch)
|
||||||
textline_con_fil = filter_contours_area_of_image(img_patch, textline_con, hierarchy, max_area=1, min_area=0.0008)
|
textline_con_fil = filter_contours_area_of_image(img_patch, textline_con, hierarchy, max_area=1, min_area=0.0008)
|
||||||
y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil)
|
y_diff_mean = np.mean(np.diff(peaks_new_tot)) # self.find_contours_mean_y_diff(textline_con_fil)
|
||||||
|
@ -231,60 +227,46 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
sigma_gaus = 3
|
sigma_gaus = 3
|
||||||
# print(sigma_gaus,'sigma')
|
# print(sigma_gaus,'sigma')
|
||||||
|
|
||||||
|
|
||||||
y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus)
|
y_padded_smoothed = gaussian_filter1d(y_padded, sigma_gaus)
|
||||||
y_padded_up_to_down = -y_padded + np.max(y_padded)
|
y_padded_up_to_down = -y_padded + np.max(y_padded)
|
||||||
y_padded_up_to_down_padded = np.zeros(len(y_padded_up_to_down) + 40)
|
y_padded_up_to_down_padded = np.zeros(len(y_padded_up_to_down) + 40)
|
||||||
y_padded_up_to_down_padded[20:len(y_padded_up_to_down) + 20] = y_padded_up_to_down
|
y_padded_up_to_down_padded[20:len(y_padded_up_to_down) + 20] = y_padded_up_to_down
|
||||||
y_padded_up_to_down_padded = gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus)
|
y_padded_up_to_down_padded = gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus)
|
||||||
|
|
||||||
|
|
||||||
peaks, _ = find_peaks(y_padded_smoothed, height=0)
|
peaks, _ = find_peaks(y_padded_smoothed, height=0)
|
||||||
peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
|
peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
neg_peaks_max = np.max(y_padded_smoothed[peaks])
|
neg_peaks_max = np.max(y_padded_smoothed[peaks])
|
||||||
|
|
||||||
|
|
||||||
arg_neg_must_be_deleted = np.array(range(len(peaks_neg)))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42]
|
arg_neg_must_be_deleted = np.array(range(len(peaks_neg)))[y_padded_up_to_down_padded[peaks_neg] / float(neg_peaks_max) < 0.42]
|
||||||
|
|
||||||
|
|
||||||
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
|
diff_arg_neg_must_be_deleted = np.diff(arg_neg_must_be_deleted)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
|
arg_diff = np.array(range(len(diff_arg_neg_must_be_deleted)))
|
||||||
arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1]
|
arg_diff_cluster = arg_diff[diff_arg_neg_must_be_deleted > 1]
|
||||||
except:
|
except:
|
||||||
arg_neg_must_be_deleted = []
|
arg_neg_must_be_deleted = []
|
||||||
arg_diff_cluster = []
|
arg_diff_cluster = []
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
peaks_new = peaks[:]
|
peaks_new = peaks[:]
|
||||||
peaks_neg_new = peaks_neg[:]
|
peaks_neg_new = peaks_neg[:]
|
||||||
clusters_to_be_deleted = []
|
clusters_to_be_deleted = []
|
||||||
|
|
||||||
|
|
||||||
if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0:
|
if len(arg_diff_cluster) >= 2 and len(arg_diff_cluster) > 0:
|
||||||
|
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0] + 1])
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0:arg_diff_cluster[0] + 1])
|
||||||
for i in range(len(arg_diff_cluster) - 1):
|
for i in range(len(arg_diff_cluster) - 1):
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i]+1:arg_diff_cluster[i+1]+1])
|
clusters_to_be_deleted.append(
|
||||||
|
arg_neg_must_be_deleted[arg_diff_cluster[i] + 1:arg_diff_cluster[i + 1] + 1])
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1:])
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1:])
|
||||||
elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0:
|
elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0:
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[:])
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[:])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if len(arg_neg_must_be_deleted) == 1:
|
if len(arg_neg_must_be_deleted) == 1:
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted)
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted)
|
||||||
|
|
||||||
|
|
||||||
if len(clusters_to_be_deleted) > 0:
|
if len(clusters_to_be_deleted) > 0:
|
||||||
peaks_new_extra = []
|
peaks_new_extra = []
|
||||||
for m in range(len(clusters_to_be_deleted)):
|
for m in range(len(clusters_to_be_deleted)):
|
||||||
|
@ -303,26 +285,25 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
peaks_new_tot.append(i1)
|
peaks_new_tot.append(i1)
|
||||||
peaks_new_tot = np.sort(peaks_new_tot)
|
peaks_new_tot = np.sort(peaks_new_tot)
|
||||||
|
|
||||||
##plt.plot(y_padded_up_to_down_padded)
|
# plt.plot(y_padded_up_to_down_padded)
|
||||||
##plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
|
# plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
|
||||||
##plt.show()
|
# plt.show()
|
||||||
|
|
||||||
##plt.plot(y_padded_up_to_down_padded)
|
# plt.plot(y_padded_up_to_down_padded)
|
||||||
##plt.plot(peaks_neg_new,y_padded_up_to_down_padded[peaks_neg_new],'*')
|
# plt.plot(peaks_neg_new,y_padded_up_to_down_padded[peaks_neg_new],'*')
|
||||||
##plt.show()
|
# plt.show()
|
||||||
|
|
||||||
##plt.plot(y_padded_smoothed)
|
# plt.plot(y_padded_smoothed)
|
||||||
##plt.plot(peaks,y_padded_smoothed[peaks],'*')
|
# plt.plot(peaks,y_padded_smoothed[peaks],'*')
|
||||||
##plt.show()
|
# plt.show()
|
||||||
|
|
||||||
##plt.plot(y_padded_smoothed)
|
# plt.plot(y_padded_smoothed)
|
||||||
##plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*')
|
# plt.plot(peaks_new_tot,y_padded_smoothed[peaks_new_tot],'*')
|
||||||
##plt.show()
|
# plt.show()
|
||||||
|
|
||||||
peaks = peaks_new_tot[:]
|
peaks = peaks_new_tot[:]
|
||||||
peaks_neg = peaks_neg_new[:]
|
peaks_neg = peaks_neg_new[:]
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
peaks_new_tot = peaks[:]
|
peaks_new_tot = peaks[:]
|
||||||
peaks = peaks_new_tot[:]
|
peaks = peaks_new_tot[:]
|
||||||
|
@ -330,12 +311,10 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
mean_value_of_peaks = np.mean(y_padded_smoothed[peaks])
|
mean_value_of_peaks = np.mean(y_padded_smoothed[peaks])
|
||||||
std_value_of_peaks = np.std(y_padded_smoothed[peaks])
|
std_value_of_peaks = np.std(y_padded_smoothed[peaks])
|
||||||
peaks_values = y_padded_smoothed[peaks]
|
peaks_values = y_padded_smoothed[peaks]
|
||||||
|
|
||||||
|
|
||||||
peaks_neg = peaks_neg - 20 - 20
|
peaks_neg = peaks_neg - 20 - 20
|
||||||
peaks = peaks - 20
|
peaks = peaks - 20
|
||||||
|
|
||||||
|
@ -347,8 +326,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
if peaks[jj] > len(x) - 1:
|
if peaks[jj] > len(x) - 1:
|
||||||
peaks[jj] = len(x) - 1
|
peaks[jj] = len(x) - 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
textline_boxes = []
|
textline_boxes = []
|
||||||
textline_boxes_rot = []
|
textline_boxes_rot = []
|
||||||
|
|
||||||
|
@ -360,34 +337,33 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
||||||
|
|
||||||
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.:
|
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.:
|
||||||
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||||
point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
point_down = y_max_cont - 1 # peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||||
else:
|
else:
|
||||||
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||||
point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
point_down = y_max_cont - 1 # peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||||
|
|
||||||
point_down_narrow = peaks[jj] + first_nonzero + int(
|
point_down_narrow = peaks[jj] + first_nonzero + int(
|
||||||
1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
1.4 * dis_to_next_down) # -int(dis_to_next_down*1./2)
|
||||||
else:
|
else:
|
||||||
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
|
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
|
||||||
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
||||||
|
|
||||||
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.:
|
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.:
|
||||||
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||||
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
point_down = peaks[jj] + first_nonzero + int(
|
||||||
|
1.1 * dis_to_next_down) # -int(dis_to_next_down*1./4.0)
|
||||||
else:
|
else:
|
||||||
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||||
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
point_down = peaks[jj] + first_nonzero + int(
|
||||||
|
1.33 * dis_to_next_down) # -int(dis_to_next_down*1./4.0)
|
||||||
|
|
||||||
point_down_narrow = peaks[jj] + first_nonzero + int(
|
point_down_narrow = peaks[jj] + first_nonzero + int(
|
||||||
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
1.1 * dis_to_next_down) # -int(dis_to_next_down*1./2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if point_down_narrow >= img_patch.shape[0]:
|
if point_down_narrow >= img_patch.shape[0]:
|
||||||
point_down_narrow = img_patch.shape[0] - 2
|
point_down_narrow = img_patch.shape[0] - 2
|
||||||
|
|
||||||
|
|
||||||
distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
|
distances = [cv2.pointPolygonTest(contour_text_interest_copy, tuple(int(x) for x in np.array([xv[mj], peaks[jj] + first_nonzero])), True)
|
||||||
for mj in range(len(xv))]
|
for mj in range(len(xv))]
|
||||||
distances = np.array(distances)
|
distances = np.array(distances)
|
||||||
|
@ -420,8 +396,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
if point_up_rot2 < 0:
|
if point_up_rot2 < 0:
|
||||||
point_up_rot2 = 0
|
point_up_rot2 = 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
x_min_rot1 = x_min_rot1 - x_help
|
x_min_rot1 = x_min_rot1 - x_help
|
||||||
x_max_rot2 = x_max_rot2 - x_help
|
x_max_rot2 = x_max_rot2 - x_help
|
||||||
x_max_rot3 = x_max_rot3 - x_help
|
x_max_rot3 = x_max_rot3 - x_help
|
||||||
|
@ -432,9 +406,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
point_down_rot3 = point_down_rot3 - y_help
|
point_down_rot3 = point_down_rot3 - y_help
|
||||||
point_down_rot4 = point_down_rot4 - y_help
|
point_down_rot4 = point_down_rot4 - y_help
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
|
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
|
||||||
[int(x_max_rot2), int(point_up_rot2)],
|
[int(x_max_rot2), int(point_up_rot2)],
|
||||||
[int(x_max_rot3), int(point_down_rot3)],
|
[int(x_max_rot3), int(point_down_rot3)],
|
||||||
|
@ -477,7 +448,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d
|
x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d
|
||||||
x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d
|
x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d
|
||||||
|
|
||||||
|
|
||||||
if x_min_rot1 < 0:
|
if x_min_rot1 < 0:
|
||||||
x_min_rot1 = 0
|
x_min_rot1 = 0
|
||||||
if x_min_rot4 < 0:
|
if x_min_rot4 < 0:
|
||||||
|
@ -487,7 +457,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
if point_up_rot2 < 0:
|
if point_up_rot2 < 0:
|
||||||
point_up_rot2 = 0
|
point_up_rot2 = 0
|
||||||
|
|
||||||
|
|
||||||
x_min_rot1 = x_min_rot1 - x_help
|
x_min_rot1 = x_min_rot1 - x_help
|
||||||
x_max_rot2 = x_max_rot2 - x_help
|
x_max_rot2 = x_max_rot2 - x_help
|
||||||
x_max_rot3 = x_max_rot3 - x_help
|
x_max_rot3 = x_max_rot3 - x_help
|
||||||
|
@ -498,9 +467,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
point_down_rot3 = point_down_rot3 - y_help
|
point_down_rot3 = point_down_rot3 - y_help
|
||||||
point_down_rot4 = point_down_rot4 - y_help
|
point_down_rot4 = point_down_rot4 - y_help
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
|
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
|
||||||
[int(x_max_rot2), int(point_up_rot2)],
|
[int(x_max_rot2), int(point_up_rot2)],
|
||||||
[int(x_max_rot3), int(point_down_rot3)],
|
[int(x_max_rot3), int(point_down_rot3)],
|
||||||
|
@ -512,7 +478,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
[int(x_min), int(y_max)]]))
|
[int(x_min), int(y_max)]]))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
elif len(peaks) == 2:
|
elif len(peaks) == 2:
|
||||||
dis_to_next = np.abs(peaks[1] - peaks[0])
|
dis_to_next = np.abs(peaks[1] - peaks[0])
|
||||||
for jj in range(len(peaks)):
|
for jj in range(len(peaks)):
|
||||||
|
@ -553,8 +518,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d
|
x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d
|
||||||
x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d
|
x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if x_min_rot1 < 0:
|
if x_min_rot1 < 0:
|
||||||
x_min_rot1 = 0
|
x_min_rot1 = 0
|
||||||
if x_min_rot4 < 0:
|
if x_min_rot4 < 0:
|
||||||
|
@ -574,9 +537,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
point_down_rot3 = point_down_rot3 - y_help
|
point_down_rot3 = point_down_rot3 - y_help
|
||||||
point_down_rot4 = point_down_rot4 - y_help
|
point_down_rot4 = point_down_rot4 - y_help
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
|
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
|
||||||
[int(x_max_rot2), int(point_up_rot2)],
|
[int(x_max_rot2), int(point_up_rot2)],
|
||||||
[int(x_max_rot3), int(point_down_rot3)],
|
[int(x_max_rot3), int(point_down_rot3)],
|
||||||
|
@ -644,7 +604,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
if point_up_rot2 < 0:
|
if point_up_rot2 < 0:
|
||||||
point_up_rot2 = 0
|
point_up_rot2 = 0
|
||||||
|
|
||||||
|
|
||||||
x_min_rot1 = x_min_rot1 - x_help
|
x_min_rot1 = x_min_rot1 - x_help
|
||||||
x_max_rot2 = x_max_rot2 - x_help
|
x_max_rot2 = x_max_rot2 - x_help
|
||||||
x_max_rot3 = x_max_rot3 - x_help
|
x_max_rot3 = x_max_rot3 - x_help
|
||||||
|
@ -655,9 +614,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
point_down_rot3 = point_down_rot3 - y_help
|
point_down_rot3 = point_down_rot3 - y_help
|
||||||
point_down_rot4 = point_down_rot4 - y_help
|
point_down_rot4 = point_down_rot4 - y_help
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
|
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
|
||||||
[int(x_max_rot2), int(point_up_rot2)],
|
[int(x_max_rot2), int(point_up_rot2)],
|
||||||
[int(x_max_rot3), int(point_down_rot3)],
|
[int(x_max_rot3), int(point_down_rot3)],
|
||||||
|
@ -668,15 +624,14 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
||||||
[int(x_max), int(point_down)],
|
[int(x_max), int(point_down)],
|
||||||
[int(x_min), int(point_down)]]))
|
[int(x_min), int(point_down)]]))
|
||||||
|
|
||||||
|
|
||||||
return peaks, textline_boxes_rot
|
return peaks, textline_boxes_rot
|
||||||
|
|
||||||
def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
|
||||||
|
|
||||||
|
def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
||||||
thetha = thetha + 90
|
thetha = thetha + 90
|
||||||
contour_text_interest_copy = contour_text_interest.copy()
|
contour_text_interest_copy = contour_text_interest.copy()
|
||||||
x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix = dedup_separate_lines(img_patch, contour_text_interest, thetha, 0)
|
x, y, x_d, y_d, xv, x_min_cont, y_min_cont, x_max_cont, y_max_cont, first_nonzero, y_padded_up_to_down_padded, y_padded_smoothed, peaks, peaks_neg, rotation_matrix = dedup_separate_lines(
|
||||||
|
img_patch, contour_text_interest, thetha, 0)
|
||||||
|
|
||||||
# plt.plot(y_padded_up_to_down_padded)
|
# plt.plot(y_padded_up_to_down_padded)
|
||||||
# plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
|
# plt.plot(peaks_neg,y_padded_up_to_down_padded[peaks_neg],'*')
|
||||||
|
@ -766,25 +721,25 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
||||||
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
||||||
|
|
||||||
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
|
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
|
||||||
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||||
point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
point_down = x_max_cont - 1 # peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||||
else:
|
else:
|
||||||
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||||
point_down = x_max_cont - 1 ##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
point_down = x_max_cont - 1 # peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
||||||
|
|
||||||
point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
point_down_narrow = peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) # -int(dis_to_next_down*1./2)
|
||||||
else:
|
else:
|
||||||
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
|
dis_to_next_up = abs(peaks[jj] - peaks_neg[jj])
|
||||||
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1])
|
||||||
|
|
||||||
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
|
if peaks_values[jj] > mean_value_of_peaks - std_value_of_peaks / 2.0:
|
||||||
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||||
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) # -int(dis_to_next_down*1./4.0)
|
||||||
else:
|
else:
|
||||||
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0)
|
point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) # +int(dis_to_next_up*1./4.0)
|
||||||
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0)
|
point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) # -int(dis_to_next_down*1./4.0)
|
||||||
|
|
||||||
point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
|
point_down_narrow = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) # -int(dis_to_next_down*1./2)
|
||||||
|
|
||||||
if point_down_narrow >= img_patch.shape[0]:
|
if point_down_narrow >= img_patch.shape[0]:
|
||||||
point_down_narrow = img_patch.shape[0] - 2
|
point_down_narrow = img_patch.shape[0] - 2
|
||||||
|
@ -820,9 +775,13 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
||||||
if point_up_rot2 < 0:
|
if point_up_rot2 < 0:
|
||||||
point_up_rot2 = 0
|
point_up_rot2 = 0
|
||||||
|
|
||||||
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]]))
|
textline_boxes_rot.append(np.array(
|
||||||
|
[[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)],
|
||||||
|
[int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]]))
|
||||||
|
|
||||||
textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]]))
|
textline_boxes.append(np.array(
|
||||||
|
[[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)],
|
||||||
|
[int(x_min), int(point_down)]]))
|
||||||
|
|
||||||
elif len(peaks) < 1:
|
elif len(peaks) < 1:
|
||||||
pass
|
pass
|
||||||
|
@ -853,9 +812,12 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
||||||
if point_up_rot2 < 0:
|
if point_up_rot2 < 0:
|
||||||
point_up_rot2 = 0
|
point_up_rot2 = 0
|
||||||
|
|
||||||
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]]))
|
textline_boxes_rot.append(np.array(
|
||||||
|
[[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)],
|
||||||
|
[int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]]))
|
||||||
|
|
||||||
textline_boxes.append(np.array([[int(x_min), int(y_min)], [int(x_max), int(y_min)], [int(x_max), int(y_max)], [int(x_min), int(y_max)]]))
|
textline_boxes.append(np.array(
|
||||||
|
[[int(x_min), int(y_min)], [int(x_max), int(y_min)], [int(x_max), int(y_max)], [int(x_min), int(y_max)]]))
|
||||||
|
|
||||||
elif len(peaks) == 2:
|
elif len(peaks) == 2:
|
||||||
dis_to_next = np.abs(peaks[1] - peaks[0])
|
dis_to_next = np.abs(peaks[1] - peaks[0])
|
||||||
|
@ -902,9 +864,13 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
||||||
if point_up_rot2 < 0:
|
if point_up_rot2 < 0:
|
||||||
point_up_rot2 = 0
|
point_up_rot2 = 0
|
||||||
|
|
||||||
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]]))
|
textline_boxes_rot.append(np.array(
|
||||||
|
[[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)],
|
||||||
|
[int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]]))
|
||||||
|
|
||||||
textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]]))
|
textline_boxes.append(np.array(
|
||||||
|
[[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)],
|
||||||
|
[int(x_min), int(point_down)]]))
|
||||||
else:
|
else:
|
||||||
for jj in range(len(peaks)):
|
for jj in range(len(peaks)):
|
||||||
|
|
||||||
|
@ -962,14 +928,18 @@ def separate_lines_vertical(img_patch, contour_text_interest, thetha):
|
||||||
if point_up_rot2 < 0:
|
if point_up_rot2 < 0:
|
||||||
point_up_rot2 = 0
|
point_up_rot2 = 0
|
||||||
|
|
||||||
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]]))
|
textline_boxes_rot.append(np.array(
|
||||||
|
[[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)],
|
||||||
|
[int(x_max_rot3), int(point_down_rot3)], [int(x_min_rot4), int(point_down_rot4)]]))
|
||||||
|
|
||||||
textline_boxes.append(np.array([[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)], [int(x_min), int(point_down)]]))
|
textline_boxes.append(np.array(
|
||||||
|
[[int(x_min), int(point_up)], [int(x_max), int(point_up)], [int(x_max), int(point_down)],
|
||||||
|
[int(x_min), int(point_down)]]))
|
||||||
|
|
||||||
return peaks, textline_boxes_rot
|
return peaks, textline_boxes_rot
|
||||||
|
|
||||||
def separate_lines_new_inside_tiles2(img_patch, thetha):
|
|
||||||
|
|
||||||
|
def separate_lines_new_inside_tiles2(img_patch, thetha):
|
||||||
(h, w) = img_patch.shape[:2]
|
(h, w) = img_patch.shape[:2]
|
||||||
center = (w // 2, h // 2)
|
center = (w // 2, h // 2)
|
||||||
M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
|
M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
|
||||||
|
@ -1094,7 +1064,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
||||||
|
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0: arg_diff_cluster[0] + 1])
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[0: arg_diff_cluster[0] + 1])
|
||||||
for i in range(len(arg_diff_cluster) - 1):
|
for i in range(len(arg_diff_cluster) - 1):
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[i] + 1 : arg_diff_cluster[i + 1] + 1])
|
clusters_to_be_deleted.append(
|
||||||
|
arg_neg_must_be_deleted[arg_diff_cluster[i] + 1: arg_diff_cluster[i + 1] + 1])
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1:])
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[arg_diff_cluster[len(arg_diff_cluster) - 1] + 1:])
|
||||||
elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0:
|
elif len(arg_neg_must_be_deleted) >= 2 and len(arg_diff_cluster) == 0:
|
||||||
clusters_to_be_deleted.append(arg_neg_must_be_deleted[:])
|
clusters_to_be_deleted.append(arg_neg_must_be_deleted[:])
|
||||||
|
@ -1150,8 +1121,8 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
||||||
std_value_of_peaks = np.std(y_padded_smoothed[peaks])
|
std_value_of_peaks = np.std(y_padded_smoothed[peaks])
|
||||||
peaks_values = y_padded_smoothed[peaks]
|
peaks_values = y_padded_smoothed[peaks]
|
||||||
|
|
||||||
###peaks_neg = peaks_neg - 20 - 20
|
# peaks_neg = peaks_neg - 20 - 20
|
||||||
###peaks = peaks - 20
|
# peaks = peaks - 20
|
||||||
peaks_neg_true = peaks_neg[:]
|
peaks_neg_true = peaks_neg[:]
|
||||||
peaks_pos_true = peaks[:]
|
peaks_pos_true = peaks[:]
|
||||||
|
|
||||||
|
@ -1172,17 +1143,18 @@ def separate_lines_new_inside_tiles2(img_patch, thetha):
|
||||||
peaks_pos_true = peaks_pos_true - 20
|
peaks_pos_true = peaks_pos_true - 20
|
||||||
|
|
||||||
for i in range(len(peaks_pos_true)):
|
for i in range(len(peaks_pos_true)):
|
||||||
##img_patch[peaks_pos_true[i]-8:peaks_pos_true[i]+8,:]=1
|
# img_patch[peaks_pos_true[i]-8:peaks_pos_true[i]+8,:]=1
|
||||||
img_patch[peaks_pos_true[i] - 6: peaks_pos_true[i] + 6, :] = 1
|
img_patch[peaks_pos_true[i] - 6: peaks_pos_true[i] + 6, :] = 1
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
kernel = np.ones((5, 5), np.uint8)
|
kernel = np.ones((5, 5), np.uint8)
|
||||||
|
|
||||||
# img_patch = cv2.erode(img_patch,kernel,iterations = 3)
|
# img_patch = cv2.erode(img_patch,kernel,iterations = 3)
|
||||||
#######################img_patch = cv2.erode(img_patch,kernel,iterations = 2)
|
# img_patch = cv2.erode(img_patch,kernel,iterations = 2)
|
||||||
img_patch = cv2.erode(img_patch, kernel, iterations=1)
|
img_patch = cv2.erode(img_patch, kernel, iterations=1)
|
||||||
return img_patch
|
return img_patch
|
||||||
|
|
||||||
|
|
||||||
def separate_lines_new_inside_tiles(img_path, thetha):
|
def separate_lines_new_inside_tiles(img_path, thetha):
|
||||||
(h, w) = img_path.shape[:2]
|
(h, w) = img_path.shape[:2]
|
||||||
center = (w // 2, h // 2)
|
center = (w // 2, h // 2)
|
||||||
|
@ -1202,8 +1174,8 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
||||||
|
|
||||||
mada_n = img_path.sum(axis=1)
|
mada_n = img_path.sum(axis=1)
|
||||||
|
|
||||||
##plt.plot(mada_n)
|
# plt.plot(mada_n)
|
||||||
##plt.show()
|
# plt.show()
|
||||||
|
|
||||||
first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None))
|
first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None))
|
||||||
|
|
||||||
|
@ -1326,6 +1298,7 @@ def separate_lines_new_inside_tiles(img_path, thetha):
|
||||||
img_path = cv2.erode(img_path, kernel, iterations=2)
|
img_path = cv2.erode(img_path, kernel, iterations=2)
|
||||||
return img_path
|
return img_path
|
||||||
|
|
||||||
|
|
||||||
def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_ind, add_boxes_coor_into_textlines):
|
def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_ind, add_boxes_coor_into_textlines):
|
||||||
kernel = np.ones((5, 5), np.uint8)
|
kernel = np.ones((5, 5), np.uint8)
|
||||||
pixel = 255
|
pixel = 255
|
||||||
|
@ -1346,7 +1319,7 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
|
||||||
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=max_area, min_area=min_area)
|
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=max_area, min_area=min_area)
|
||||||
|
|
||||||
cont_final = []
|
cont_final = []
|
||||||
###print(add_boxes_coor_into_textlines,'ikki')
|
# print(add_boxes_coor_into_textlines,'ikki')
|
||||||
for i in range(len(contours_imgs)):
|
for i in range(len(contours_imgs)):
|
||||||
img_contour = np.zeros((cnts_images.shape[0], cnts_images.shape[1], 3))
|
img_contour = np.zeros((cnts_images.shape[0], cnts_images.shape[1], 3))
|
||||||
img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=(255, 255, 255))
|
img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=(255, 255, 255))
|
||||||
|
@ -1358,21 +1331,20 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i
|
||||||
_, threshrot = cv2.threshold(imgrayrot, 0, 255, 0)
|
_, threshrot = cv2.threshold(imgrayrot, 0, 255, 0)
|
||||||
contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
|
||||||
##contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[
|
# contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[
|
||||||
##0]
|
# 0]
|
||||||
##contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1]
|
# contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1]
|
||||||
##if add_boxes_coor_into_textlines:
|
# if add_boxes_coor_into_textlines:
|
||||||
##print(np.shape(contours_text_rot[0]),'sjppo')
|
# print(np.shape(contours_text_rot[0]),'sjppo')
|
||||||
##contours_text_rot[0][:, 0, 0]=contours_text_rot[0][:, 0, 0] + box_ind[0]
|
# contours_text_rot[0][:, 0, 0]=contours_text_rot[0][:, 0, 0] + box_ind[0]
|
||||||
##contours_text_rot[0][:, 0, 1]=contours_text_rot[0][:, 0, 1] + box_ind[1]
|
# contours_text_rot[0][:, 0, 1]=contours_text_rot[0][:, 0, 1] + box_ind[1]
|
||||||
cont_final.append(contours_text_rot[0])
|
cont_final.append(contours_text_rot[0])
|
||||||
|
|
||||||
##print(cont_final,'nadizzzz')
|
# print(cont_final,'nadizzzz')
|
||||||
return None, cont_final
|
return None, cont_final
|
||||||
|
|
||||||
|
|
||||||
def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False):
|
def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False):
|
||||||
|
|
||||||
textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
|
textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
|
||||||
textline_mask = textline_mask.astype(np.uint8)
|
textline_mask = textline_mask.astype(np.uint8)
|
||||||
kernel = np.ones((5, 5), np.uint8)
|
kernel = np.ones((5, 5), np.uint8)
|
||||||
|
@ -1461,8 +1433,8 @@ def textline_contours_postprocessing(textline_mask, slope, contour_text_interest
|
||||||
|
|
||||||
return contours_rotated_clean
|
return contours_rotated_clean
|
||||||
|
|
||||||
def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None):
|
|
||||||
|
|
||||||
|
def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None):
|
||||||
if num_col == 1:
|
if num_col == 1:
|
||||||
num_patches = int(img_path.shape[1] / 200.0)
|
num_patches = int(img_path.shape[1] / 200.0)
|
||||||
else:
|
else:
|
||||||
|
@ -1547,9 +1519,10 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None):
|
||||||
img_int = np.zeros((img_xline.shape[0], img_xline.shape[1]))
|
img_int = np.zeros((img_xline.shape[0], img_xline.shape[1]))
|
||||||
img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0]
|
img_int[:, :] = img_xline[:, :] # img_patch_org[:,:,0]
|
||||||
|
|
||||||
img_resized = np.zeros((int(img_int.shape[0] * (1.2)), int(img_int.shape[1] * (3))))
|
img_resized = np.zeros((int(img_int.shape[0] * 1.2), int(img_int.shape[1] * 3)))
|
||||||
|
|
||||||
img_resized[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]] = img_int[:, :]
|
img_resized[int(img_int.shape[0] * 0.1): int(img_int.shape[0] * 0.1) + img_int.shape[0],
|
||||||
|
int(img_int.shape[1] * 1): int(img_int.shape[1] * 1) + img_int.shape[1]] = img_int[:, :]
|
||||||
# plt.imshow(img_xline)
|
# plt.imshow(img_xline)
|
||||||
# plt.show()
|
# plt.show()
|
||||||
img_line_rotated = rotate_image(img_resized, slopes_tile_wise[i])
|
img_line_rotated = rotate_image(img_resized, slopes_tile_wise[i])
|
||||||
|
@ -1560,7 +1533,9 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None):
|
||||||
img_patch_separated_returned = rotate_image(img_patch_separated, -slopes_tile_wise[i])
|
img_patch_separated_returned = rotate_image(img_patch_separated, -slopes_tile_wise[i])
|
||||||
img_patch_separated_returned[:, :][img_patch_separated_returned[:, :] != 0] = 1
|
img_patch_separated_returned[:, :][img_patch_separated_returned[:, :] != 0] = 1
|
||||||
|
|
||||||
img_patch_separated_returned_true_size = img_patch_separated_returned[int(img_int.shape[0] * (0.1)) : int(img_int.shape[0] * (0.1)) + img_int.shape[0], int(img_int.shape[1] * (1)) : int(img_int.shape[1] * (1)) + img_int.shape[1]]
|
img_patch_separated_returned_true_size = img_patch_separated_returned[
|
||||||
|
int(img_int.shape[0] * 0.1): int(img_int.shape[0] * 0.1) + img_int.shape[0],
|
||||||
|
int(img_int.shape[1] * 1): int(img_int.shape[1] * 1) + img_int.shape[1]]
|
||||||
|
|
||||||
img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin: length_x - margin]
|
img_patch_separated_returned_true_size = img_patch_separated_returned_true_size[:, margin: length_x - margin]
|
||||||
img_patch_ineterst_revised[:, index_x_d + margin: index_x_u - margin] = img_patch_separated_returned_true_size
|
img_patch_ineterst_revised[:, index_x_d + margin: index_x_u - margin] = img_patch_separated_returned_true_size
|
||||||
|
@ -1569,28 +1544,22 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None):
|
||||||
# plt.show()
|
# plt.show()
|
||||||
return img_patch_ineterst_revised
|
return img_patch_ineterst_revised
|
||||||
|
|
||||||
def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):
|
|
||||||
|
|
||||||
|
def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):
|
||||||
if main_page and plotter:
|
if main_page and plotter:
|
||||||
plotter.save_plot_of_textline_density(img_patch_org)
|
plotter.save_plot_of_textline_density(img_patch_org)
|
||||||
|
|
||||||
img_int = np.zeros((img_patch_org.shape[0], img_patch_org.shape[1]))
|
img_int = np.zeros((img_patch_org.shape[0], img_patch_org.shape[1]))
|
||||||
img_int[:, :] = img_patch_org[:, :] # img_patch_org[:,:,0]
|
img_int[:, :] = img_patch_org[:, :] # img_patch_org[:,:,0]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
max_shape = np.max(img_int.shape)
|
max_shape = np.max(img_int.shape)
|
||||||
img_resized=np.zeros((int( max_shape*(1.1) ) , int( max_shape*(1.1) ) ))
|
img_resized = np.zeros((int(max_shape * 1.1), int(max_shape * 1.1)))
|
||||||
|
|
||||||
|
|
||||||
onset_x = int((img_resized.shape[1] - img_int.shape[1]) / 2.)
|
onset_x = int((img_resized.shape[1] - img_int.shape[1]) / 2.)
|
||||||
onset_y = int((img_resized.shape[0] - img_int.shape[0]) / 2.)
|
onset_y = int((img_resized.shape[0] - img_int.shape[0]) / 2.)
|
||||||
|
|
||||||
|
|
||||||
# img_resized = np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) ))
|
# img_resized = np.zeros((int( img_int.shape[0]*(1.8) ) , int( img_int.shape[1]*(2.6) ) ))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# img_resized[int(img_int.shape[0]*(.4)):int(img_int.shape[0]*(.4))+img_int.shape[0], int(img_int.shape[1]*(.8)):int(img_int.shape[1]*(.8))+img_int.shape[1]]=img_int[:,:]
|
# img_resized[int(img_int.shape[0]*(.4)):int(img_int.shape[0]*(.4))+img_int.shape[0], int(img_int.shape[1]*(.8)):int(img_int.shape[1]*(.8))+img_int.shape[1]]=img_int[:,:]
|
||||||
img_resized[onset_y:onset_y + img_int.shape[0], onset_x:onset_x + img_int.shape[1]] = img_int[:, :]
|
img_resized[onset_y:onset_y + img_int.shape[0], onset_x:onset_x + img_int.shape[1]] = img_int[:, :]
|
||||||
|
|
||||||
|
@ -1615,24 +1584,24 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):
|
||||||
# print(var_spectrum,'var_spectrum')
|
# print(var_spectrum,'var_spectrum')
|
||||||
try:
|
try:
|
||||||
var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
|
var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
|
||||||
##print(rot,var_spectrum,'var_spectrum')
|
# print(rot,var_spectrum,'var_spectrum')
|
||||||
except:
|
except:
|
||||||
var_spectrum = 0
|
var_spectrum = 0
|
||||||
var_res.append(var_spectrum)
|
var_res.append(var_spectrum)
|
||||||
try:
|
try:
|
||||||
var_res = np.array(var_res)
|
var_res = np.array(var_res)
|
||||||
ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
|
ang_int = angels[np.argmax(
|
||||||
|
var_res)] # angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin]
|
||||||
except:
|
except:
|
||||||
ang_int = 0
|
ang_int = 0
|
||||||
|
|
||||||
|
|
||||||
angels = np.linspace(ang_int - 22.5, ang_int + 22.5, 100)
|
angels = np.linspace(ang_int - 22.5, ang_int + 22.5, 100)
|
||||||
|
|
||||||
var_res = []
|
var_res = []
|
||||||
for rot in angels:
|
for rot in angels:
|
||||||
img_rot = rotate_image(img_resized, rot)
|
img_rot = rotate_image(img_resized, rot)
|
||||||
##plt.imshow(img_rot)
|
# plt.imshow(img_rot)
|
||||||
##plt.show()
|
# plt.show()
|
||||||
img_rot[img_rot != 0] = 1
|
img_rot[img_rot != 0] = 1
|
||||||
try:
|
try:
|
||||||
var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
|
var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
|
||||||
|
@ -1651,7 +1620,6 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):
|
||||||
# plt.show()
|
# plt.show()
|
||||||
angels = np.linspace(-12, 12, 100) # np.array([0 , 45 , 90 , -45])
|
angels = np.linspace(-12, 12, 100) # np.array([0 , 45 , 90 , -45])
|
||||||
|
|
||||||
|
|
||||||
var_res = []
|
var_res = []
|
||||||
|
|
||||||
for rot in angels:
|
for rot in angels:
|
||||||
|
@ -1669,7 +1637,6 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):
|
||||||
|
|
||||||
var_res.append(var_spectrum)
|
var_res.append(var_spectrum)
|
||||||
|
|
||||||
|
|
||||||
if plotter:
|
if plotter:
|
||||||
plotter.save_plot_of_rotation_angle(angels, var_res)
|
plotter.save_plot_of_rotation_angle(angels, var_res)
|
||||||
try:
|
try:
|
||||||
|
@ -1684,8 +1651,8 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):
|
||||||
var_res = []
|
var_res = []
|
||||||
for rot in angels:
|
for rot in angels:
|
||||||
img_rot = rotate_image(img_resized, rot)
|
img_rot = rotate_image(img_resized, rot)
|
||||||
##plt.imshow(img_rot)
|
# plt.imshow(img_rot)
|
||||||
##plt.show()
|
# plt.show()
|
||||||
img_rot[img_rot != 0] = 1
|
img_rot[img_rot != 0] = 1
|
||||||
try:
|
try:
|
||||||
var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
|
var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
|
||||||
|
@ -1704,8 +1671,8 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):
|
||||||
var_res = []
|
var_res = []
|
||||||
for rot in angels:
|
for rot in angels:
|
||||||
img_rot = rotate_image(img_resized, rot)
|
img_rot = rotate_image(img_resized, rot)
|
||||||
##plt.imshow(img_rot)
|
# plt.imshow(img_rot)
|
||||||
##plt.show()
|
# plt.show()
|
||||||
img_rot[img_rot != 0] = 1
|
img_rot[img_rot != 0] = 1
|
||||||
try:
|
try:
|
||||||
var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
|
var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
|
||||||
|
@ -1742,8 +1709,8 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):
|
||||||
|
|
||||||
# plt.plot(var_res)
|
# plt.plot(var_res)
|
||||||
# plt.show()
|
# plt.show()
|
||||||
##plt.plot(mom3_res)
|
# plt.plot(mom3_res)
|
||||||
##plt.show()
|
# plt.show()
|
||||||
# print(ang_int,'ang_int111')
|
# print(ang_int,'ang_int111')
|
||||||
|
|
||||||
early_slope_edge = 22
|
early_slope_edge = 22
|
||||||
|
@ -1755,8 +1722,8 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):
|
||||||
|
|
||||||
for rot in angels:
|
for rot in angels:
|
||||||
img_rot = rotate_image(img_resized, rot)
|
img_rot = rotate_image(img_resized, rot)
|
||||||
##plt.imshow(img_rot)
|
# plt.imshow(img_rot)
|
||||||
##plt.show()
|
# plt.show()
|
||||||
img_rot[img_rot != 0] = 1
|
img_rot[img_rot != 0] = 1
|
||||||
try:
|
try:
|
||||||
var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
|
var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
|
||||||
|
@ -1779,8 +1746,8 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):
|
||||||
indexer = 0
|
indexer = 0
|
||||||
for rot in angels:
|
for rot in angels:
|
||||||
img_rot = rotate_image(img_resized, rot)
|
img_rot = rotate_image(img_resized, rot)
|
||||||
##plt.imshow(img_rot)
|
# plt.imshow(img_rot)
|
||||||
##plt.show()
|
# plt.show()
|
||||||
img_rot[img_rot != 0] = 1
|
img_rot[img_rot != 0] = 1
|
||||||
try:
|
try:
|
||||||
var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
|
var_spectrum = find_num_col_deskew(img_rot, sigma_des, 20.3)
|
||||||
|
@ -1796,4 +1763,3 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None):
|
||||||
ang_int = 0
|
ang_int = 0
|
||||||
|
|
||||||
return ang_int
|
return ang_int
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@ from ocrd_models.ocrd_page import (
|
||||||
|
|
||||||
to_xml)
|
to_xml)
|
||||||
|
|
||||||
|
|
||||||
def create_page_xml(imageFilename, height, width):
|
def create_page_xml(imageFilename, height, width):
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
pcgts = PcGtsType(
|
pcgts = PcGtsType(
|
||||||
|
@ -46,6 +47,7 @@ def create_page_xml(imageFilename, height, width):
|
||||||
))
|
))
|
||||||
return pcgts
|
return pcgts
|
||||||
|
|
||||||
|
|
||||||
def xml_reading_order(page, order_of_texts, id_of_marginalia):
|
def xml_reading_order(page, order_of_texts, id_of_marginalia):
|
||||||
region_order = ReadingOrderType()
|
region_order = ReadingOrderType()
|
||||||
og = OrderedGroupType(id="ro357564684568544579089")
|
og = OrderedGroupType(id="ro357564684568544579089")
|
||||||
|
@ -59,6 +61,7 @@ def xml_reading_order(page, order_of_texts, id_of_marginalia):
|
||||||
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
|
og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal))
|
||||||
region_counter.inc('region')
|
region_counter.inc('region')
|
||||||
|
|
||||||
|
|
||||||
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point):
|
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point):
|
||||||
indexes_sorted = np.array(indexes_sorted)
|
indexes_sorted = np.array(indexes_sorted)
|
||||||
index_of_types = np.array(index_of_types)
|
index_of_types = np.array(index_of_types)
|
||||||
|
|
|
@ -20,6 +20,7 @@ from ocrd_models.ocrd_page import (
|
||||||
)
|
)
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class EynollahXmlWriter():
|
class EynollahXmlWriter():
|
||||||
|
|
||||||
def __init__(self, *, dir_out, image_filename, curved_line, textline_light, pcgts=None):
|
def __init__(self, *, dir_out, image_filename, curved_line, textline_light, pcgts=None):
|
||||||
|
@ -140,7 +141,11 @@ class EynollahXmlWriter():
|
||||||
with open(out_fname, 'w') as f:
|
with open(out_fname, 'w') as f:
|
||||||
f.write(to_xml(pcgts))
|
f.write(to_xml(pcgts))
|
||||||
|
|
||||||
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables):
|
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts,
|
||||||
|
all_found_textline_polygons, all_box_coord, found_polygons_text_region_img,
|
||||||
|
found_polygons_marginals, all_found_textline_polygons_marginals,
|
||||||
|
all_box_coord_marginals, slopes, slopes_marginals, cont_page,
|
||||||
|
polygons_lines_to_be_written_in_xml, found_polygons_tables):
|
||||||
self.logger.debug('enter build_pagexml_no_full_layout')
|
self.logger.debug('enter build_pagexml_no_full_layout')
|
||||||
|
|
||||||
# create the file structure
|
# create the file structure
|
||||||
|
@ -156,8 +161,7 @@ class EynollahXmlWriter():
|
||||||
|
|
||||||
for mm in range(len(found_polygons_text_region)):
|
for mm in range(len(found_polygons_text_region)):
|
||||||
textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
|
textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
|
||||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord)),
|
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord)),)
|
||||||
)
|
|
||||||
page.add_TextRegion(textregion)
|
page.add_TextRegion(textregion)
|
||||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter)
|
self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter)
|
||||||
|
|
||||||
|
@ -209,7 +213,13 @@ class EynollahXmlWriter():
|
||||||
|
|
||||||
return pcgts
|
return pcgts
|
||||||
|
|
||||||
def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml):
|
def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord,
|
||||||
|
order_of_texts, id_of_texts, all_found_textline_polygons,
|
||||||
|
all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
|
||||||
|
found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals,
|
||||||
|
found_polygons_marginals, all_found_textline_polygons_marginals,
|
||||||
|
all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page,
|
||||||
|
polygons_lines_to_be_written_in_xml):
|
||||||
self.logger.debug('enter build_pagexml_full_layout')
|
self.logger.debug('enter build_pagexml_full_layout')
|
||||||
|
|
||||||
# create the file structure
|
# create the file structure
|
||||||
|
@ -246,13 +256,16 @@ class EynollahXmlWriter():
|
||||||
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))))
|
Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))))
|
||||||
|
|
||||||
for mm in range(len(found_polygons_text_region_img)):
|
for mm in range(len(found_polygons_text_region_img)):
|
||||||
page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))
|
page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(
|
||||||
|
points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord))))
|
||||||
|
|
||||||
for mm in range(len(polygons_lines_to_be_written_in_xml)):
|
for mm in range(len(polygons_lines_to_be_written_in_xml)):
|
||||||
page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0]))))
|
page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(
|
||||||
|
points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0, 0, 0, 0]))))
|
||||||
|
|
||||||
for mm in range(len(found_polygons_tables)):
|
for mm in range(len(found_polygons_tables)):
|
||||||
page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord))))
|
page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(
|
||||||
|
points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord))))
|
||||||
|
|
||||||
return pcgts
|
return pcgts
|
||||||
|
|
||||||
|
@ -270,4 +283,3 @@ class EynollahXmlWriter():
|
||||||
coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y))
|
coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y))
|
||||||
coords = coords + ' '
|
coords = coords + ' '
|
||||||
return coords[:-1]
|
return coords[:-1]
|
||||||
|
|
||||||
|
|
|
@ -10,12 +10,14 @@ from unittest import TestCase as VanillaTestCase, skip, main as unittests_main
|
||||||
import pytest
|
import pytest
|
||||||
from ocrd_utils import disableLogging, initLogging
|
from ocrd_utils import disableLogging, initLogging
|
||||||
|
|
||||||
|
|
||||||
def main(fn=None):
|
def main(fn=None):
|
||||||
if fn:
|
if fn:
|
||||||
sys.exit(pytest.main([fn]))
|
sys.exit(pytest.main([fn]))
|
||||||
else:
|
else:
|
||||||
unittests_main()
|
unittests_main()
|
||||||
|
|
||||||
|
|
||||||
class TestCase(VanillaTestCase):
|
class TestCase(VanillaTestCase):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -26,6 +28,7 @@ class TestCase(VanillaTestCase):
|
||||||
disableLogging()
|
disableLogging()
|
||||||
initLogging()
|
initLogging()
|
||||||
|
|
||||||
|
|
||||||
class CapturingTestCase(TestCase):
|
class CapturingTestCase(TestCase):
|
||||||
"""
|
"""
|
||||||
A TestCase that needs to capture stderr/stdout and invoke click CLI.
|
A TestCase that needs to capture stderr/stdout and invoke click CLI.
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from tests.base import main
|
from tests.base import main
|
||||||
from src.eynollah.utils.counter import EynollahIdCounter
|
from src.eynollah.utils.counter import EynollahIdCounter
|
||||||
|
|
||||||
|
|
||||||
def test_counter_string():
|
def test_counter_string():
|
||||||
c = EynollahIdCounter()
|
c = EynollahIdCounter()
|
||||||
assert c.next_region_id == 'region_0001'
|
assert c.next_region_id == 'region_0001'
|
||||||
|
@ -11,6 +12,7 @@ def test_counter_string():
|
||||||
assert c.region_id(999) == 'region_0999'
|
assert c.region_id(999) == 'region_0999'
|
||||||
assert c.line_id(999, 888) == 'region_0999_line_0888'
|
assert c.line_id(999, 888) == 'region_0999_line_0888'
|
||||||
|
|
||||||
|
|
||||||
def test_counter_init():
|
def test_counter_init():
|
||||||
c = EynollahIdCounter(region_idx=2)
|
c = EynollahIdCounter(region_idx=2)
|
||||||
assert c.get('region') == 2
|
assert c.get('region') == 2
|
||||||
|
@ -19,6 +21,7 @@ def test_counter_init():
|
||||||
c.reset()
|
c.reset()
|
||||||
assert c.get('region') == 2
|
assert c.get('region') == 2
|
||||||
|
|
||||||
|
|
||||||
def test_counter_methods():
|
def test_counter_methods():
|
||||||
c = EynollahIdCounter()
|
c = EynollahIdCounter()
|
||||||
assert c.get('region') == 0
|
assert c.get('region') == 0
|
||||||
|
@ -29,5 +32,6 @@ def test_counter_methods():
|
||||||
c.inc('region', -9)
|
c.inc('region', -9)
|
||||||
assert c.get('region') == 1
|
assert c.get('region') == 1
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(__file__)
|
main(__file__)
|
||||||
|
|
|
@ -3,9 +3,11 @@ from pathlib import Path
|
||||||
from src.eynollah.utils.pil_cv2 import check_dpi
|
from src.eynollah.utils.pil_cv2 import check_dpi
|
||||||
from tests.base import main
|
from tests.base import main
|
||||||
|
|
||||||
|
|
||||||
def test_dpi():
|
def test_dpi():
|
||||||
fpath = str(Path(__file__).parent.joinpath('resources', 'kant_aufklaerung_1784_0020.tif'))
|
fpath = str(Path(__file__).parent.joinpath('resources', 'kant_aufklaerung_1784_0020.tif'))
|
||||||
assert 230 == check_dpi(cv2.imread(fpath))
|
assert 230 == check_dpi(cv2.imread(fpath))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(__file__)
|
main(__file__)
|
||||||
|
|
|
@ -8,6 +8,7 @@ testdir = Path(__file__).parent.resolve()
|
||||||
|
|
||||||
EYNOLLAH_MODELS = environ.get('EYNOLLAH_MODELS', str(testdir.joinpath('..', 'models_eynollah').resolve()))
|
EYNOLLAH_MODELS = environ.get('EYNOLLAH_MODELS', str(testdir.joinpath('..', 'models_eynollah').resolve()))
|
||||||
|
|
||||||
|
|
||||||
class TestEynollahRun(TestCase):
|
class TestEynollahRun(TestCase):
|
||||||
|
|
||||||
def test_full_run(self):
|
def test_full_run(self):
|
||||||
|
@ -20,5 +21,6 @@ class TestEynollahRun(TestCase):
|
||||||
print(code, out, err)
|
print(code, out, err)
|
||||||
assert not code
|
assert not code
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main(__file__)
|
main(__file__)
|
||||||
|
|
|
@ -4,11 +4,13 @@ from ocrd_models.ocrd_page import to_xml
|
||||||
|
|
||||||
PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15'
|
PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15'
|
||||||
|
|
||||||
|
|
||||||
def test_create_xml():
|
def test_create_xml():
|
||||||
pcgts = create_page_xml('/path/to/img.tif', 100, 100)
|
pcgts = create_page_xml('/path/to/img.tif', 100, 100)
|
||||||
xmlstr = to_xml(pcgts)
|
xmlstr = to_xml(pcgts)
|
||||||
assert 'xmlns:pc="%s"' % PAGE_2019 in xmlstr
|
assert 'xmlns:pc="%s"' % PAGE_2019 in xmlstr
|
||||||
assert 'Metadata' in xmlstr
|
assert 'Metadata' in xmlstr
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main([__file__])
|
main([__file__])
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue