mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-09 12:19:54 +02:00
flow from directory
This commit is contained in:
parent
cf5ef8f5ae
commit
c606391c31
4 changed files with 767 additions and 437 deletions
|
@ -10,7 +10,6 @@ from qurator.eynollah.eynollah import Eynollah
|
||||||
"-i",
|
"-i",
|
||||||
help="image filename",
|
help="image filename",
|
||||||
type=click.Path(exists=True, dir_okay=False),
|
type=click.Path(exists=True, dir_okay=False),
|
||||||
required=True,
|
|
||||||
)
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--out",
|
"--out",
|
||||||
|
@ -19,6 +18,12 @@ from qurator.eynollah.eynollah import Eynollah
|
||||||
type=click.Path(exists=True, file_okay=False),
|
type=click.Path(exists=True, file_okay=False),
|
||||||
required=True,
|
required=True,
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--dir_in",
|
||||||
|
"-di",
|
||||||
|
help="directory of images",
|
||||||
|
type=click.Path(exists=True, file_okay=False),
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--model",
|
"--model",
|
||||||
"-m",
|
"-m",
|
||||||
|
@ -112,6 +117,7 @@ from qurator.eynollah.eynollah import Eynollah
|
||||||
def main(
|
def main(
|
||||||
image,
|
image,
|
||||||
out,
|
out,
|
||||||
|
dir_in,
|
||||||
model,
|
model,
|
||||||
save_images,
|
save_images,
|
||||||
save_layout,
|
save_layout,
|
||||||
|
@ -140,6 +146,7 @@ def main(
|
||||||
eynollah = Eynollah(
|
eynollah = Eynollah(
|
||||||
image_filename=image,
|
image_filename=image,
|
||||||
dir_out=out,
|
dir_out=out,
|
||||||
|
dir_in=dir_in,
|
||||||
dir_models=model,
|
dir_models=model,
|
||||||
dir_of_cropped_images=save_images,
|
dir_of_cropped_images=save_images,
|
||||||
dir_of_layout=save_layout,
|
dir_of_layout=save_layout,
|
||||||
|
@ -155,8 +162,9 @@ def main(
|
||||||
headers_off=headers_off,
|
headers_off=headers_off,
|
||||||
light_version=light_version,
|
light_version=light_version,
|
||||||
)
|
)
|
||||||
pcgts = eynollah.run()
|
eynollah.run()
|
||||||
eynollah.writer.write_pagexml(pcgts)
|
#pcgts = eynollah.run()
|
||||||
|
##eynollah.writer.write_pagexml(pcgts)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -797,6 +797,76 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch):
|
||||||
return layout_in_patch
|
return layout_in_patch
|
||||||
|
|
||||||
def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_texline_polygons,slopes,contours_only_text_parent_d_ordered):
|
def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_texline_polygons,slopes,contours_only_text_parent_d_ordered):
|
||||||
|
|
||||||
|
cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent)
|
||||||
|
|
||||||
|
length_con=x_max_main-x_min_main
|
||||||
|
height_con=y_max_main-y_min_main
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
all_found_texline_polygons_main=[]
|
||||||
|
all_found_texline_polygons_head=[]
|
||||||
|
|
||||||
|
all_box_coord_main=[]
|
||||||
|
all_box_coord_head=[]
|
||||||
|
|
||||||
|
slopes_main=[]
|
||||||
|
slopes_head=[]
|
||||||
|
|
||||||
|
contours_only_text_parent_main=[]
|
||||||
|
contours_only_text_parent_head=[]
|
||||||
|
|
||||||
|
contours_only_text_parent_main_d=[]
|
||||||
|
contours_only_text_parent_head_d=[]
|
||||||
|
|
||||||
|
for ii in range(len(contours_only_text_parent)):
|
||||||
|
con=contours_only_text_parent[ii]
|
||||||
|
img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3))
|
||||||
|
img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
all_pixels=((img[:,:,0]==255)*1).sum()
|
||||||
|
|
||||||
|
pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum()
|
||||||
|
pixels_main=all_pixels-pixels_header
|
||||||
|
|
||||||
|
|
||||||
|
if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ):
|
||||||
|
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2
|
||||||
|
contours_only_text_parent_head.append(con)
|
||||||
|
if contours_only_text_parent_d_ordered is not None:
|
||||||
|
contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii])
|
||||||
|
all_box_coord_head.append(all_box_coord[ii])
|
||||||
|
slopes_head.append(slopes[ii])
|
||||||
|
all_found_texline_polygons_head.append(all_found_texline_polygons[ii])
|
||||||
|
else:
|
||||||
|
regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1
|
||||||
|
contours_only_text_parent_main.append(con)
|
||||||
|
if contours_only_text_parent_d_ordered is not None:
|
||||||
|
contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii])
|
||||||
|
all_box_coord_main.append(all_box_coord[ii])
|
||||||
|
slopes_main.append(slopes[ii])
|
||||||
|
all_found_texline_polygons_main.append(all_found_texline_polygons[ii])
|
||||||
|
|
||||||
|
#print(all_pixels,pixels_main,pixels_header)
|
||||||
|
|
||||||
|
return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_texline_polygons_main,all_found_texline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d
|
||||||
|
|
||||||
|
|
||||||
|
def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_texline_polygons,slopes,contours_only_text_parent_d_ordered):
|
||||||
|
|
||||||
|
### to make it faster
|
||||||
|
h_o = regions_model_1.shape[0]
|
||||||
|
w_o = regions_model_1.shape[1]
|
||||||
|
|
||||||
|
regions_model_1 = cv2.resize(regions_model_1, (int(regions_model_1.shape[1]/3.), int(regions_model_1.shape[0]/3.)), interpolation=cv2.INTER_NEAREST)
|
||||||
|
regions_model_full = cv2.resize(regions_model_full, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST)
|
||||||
|
contours_only_text_parent = [ (i/3.).astype(np.int32) for i in contours_only_text_parent]
|
||||||
|
|
||||||
|
###
|
||||||
|
|
||||||
cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent)
|
cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent)
|
||||||
|
|
||||||
length_con=x_max_main-x_min_main
|
length_con=x_max_main-x_min_main
|
||||||
|
@ -853,8 +923,14 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#plt.imshow(img[:,:,0])
|
### to make it faster
|
||||||
#plt.show()
|
|
||||||
|
regions_model_1 = cv2.resize(regions_model_1, (w_o, h_o), interpolation=cv2.INTER_NEAREST)
|
||||||
|
#regions_model_full = cv2.resize(img, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST)
|
||||||
|
contours_only_text_parent_head = [ (i*3.).astype(np.int32) for i in contours_only_text_parent_head]
|
||||||
|
contours_only_text_parent_main = [ (i*3.).astype(np.int32) for i in contours_only_text_parent_main]
|
||||||
|
###
|
||||||
|
|
||||||
return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_texline_polygons_main,all_found_texline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d
|
return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_texline_polygons_main,all_found_texline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d
|
||||||
|
|
||||||
def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col):
|
def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col):
|
||||||
|
|
|
@ -3,7 +3,8 @@ import numpy as np
|
||||||
from shapely import geometry
|
from shapely import geometry
|
||||||
|
|
||||||
from .rotate import rotate_image, rotation_image_new
|
from .rotate import rotate_image, rotation_image_new
|
||||||
|
from multiprocessing import Process, Queue, cpu_count
|
||||||
|
from multiprocessing import Pool
|
||||||
def contours_in_same_horizon(cy_main_hor):
|
def contours_in_same_horizon(cy_main_hor):
|
||||||
X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
||||||
X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
||||||
|
@ -147,6 +148,96 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
|
||||||
|
|
||||||
return contours_imgs
|
return contours_imgs
|
||||||
|
|
||||||
|
def do_work_of_contours_in_image(queue_of_all_params, contours_per_process, indexes_r_con_per_pro, img, slope_first):
|
||||||
|
cnts_org_per_each_subprocess = []
|
||||||
|
index_by_text_region_contours = []
|
||||||
|
for mv in range(len(contours_per_process)):
|
||||||
|
index_by_text_region_contours.append(indexes_r_con_per_pro[mv])
|
||||||
|
|
||||||
|
img_copy = np.zeros(img.shape)
|
||||||
|
img_copy = cv2.fillPoly(img_copy, pts=[contours_per_process[mv]], color=(1, 1, 1))
|
||||||
|
|
||||||
|
img_copy = rotation_image_new(img_copy, -slope_first)
|
||||||
|
|
||||||
|
img_copy = img_copy.astype(np.uint8)
|
||||||
|
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
||||||
|
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||||
|
|
||||||
|
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
|
||||||
|
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
||||||
|
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
|
||||||
|
|
||||||
|
|
||||||
|
cnts_org_per_each_subprocess.append(cont_int[0])
|
||||||
|
|
||||||
|
queue_of_all_params.put([ cnts_org_per_each_subprocess, index_by_text_region_contours])
|
||||||
|
|
||||||
|
|
||||||
|
def get_textregion_contours_in_org_image_multi(cnts, img, slope_first):
|
||||||
|
|
||||||
|
num_cores = cpu_count()
|
||||||
|
queue_of_all_params = Queue()
|
||||||
|
|
||||||
|
processes = []
|
||||||
|
nh = np.linspace(0, len(cnts), num_cores + 1)
|
||||||
|
indexes_by_text_con = np.array(range(len(cnts)))
|
||||||
|
for i in range(num_cores):
|
||||||
|
contours_per_process = cnts[int(nh[i]) : int(nh[i + 1])]
|
||||||
|
indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])]
|
||||||
|
|
||||||
|
processes.append(Process(target=do_work_of_contours_in_image, args=(queue_of_all_params, contours_per_process, indexes_text_con_per_process, img,slope_first )))
|
||||||
|
for i in range(num_cores):
|
||||||
|
processes[i].start()
|
||||||
|
cnts_org = []
|
||||||
|
all_index_text_con = []
|
||||||
|
for i in range(num_cores):
|
||||||
|
list_all_par = queue_of_all_params.get(True)
|
||||||
|
contours_for_sub_process = list_all_par[0]
|
||||||
|
indexes_for_sub_process = list_all_par[1]
|
||||||
|
for j in range(len(contours_for_sub_process)):
|
||||||
|
cnts_org.append(contours_for_sub_process[j])
|
||||||
|
all_index_text_con.append(indexes_for_sub_process[j])
|
||||||
|
for i in range(num_cores):
|
||||||
|
processes[i].join()
|
||||||
|
|
||||||
|
print(all_index_text_con)
|
||||||
|
return cnts_org
|
||||||
|
def loop_contour_image(index_l, cnts,img, slope_first):
|
||||||
|
img_copy = np.zeros(img.shape)
|
||||||
|
img_copy = cv2.fillPoly(img_copy, pts=[cnts[index_l]], color=(1, 1, 1))
|
||||||
|
|
||||||
|
# plt.imshow(img_copy)
|
||||||
|
# plt.show()
|
||||||
|
|
||||||
|
# print(img.shape,'img')
|
||||||
|
img_copy = rotation_image_new(img_copy, -slope_first)
|
||||||
|
##print(img_copy.shape,'img_copy')
|
||||||
|
# plt.imshow(img_copy)
|
||||||
|
# plt.show()
|
||||||
|
|
||||||
|
img_copy = img_copy.astype(np.uint8)
|
||||||
|
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
||||||
|
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||||
|
|
||||||
|
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
|
||||||
|
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
||||||
|
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
|
||||||
|
# print(np.shape(cont_int[0]))
|
||||||
|
return cont_int[0]
|
||||||
|
|
||||||
|
def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first):
|
||||||
|
|
||||||
|
cnts_org = []
|
||||||
|
# print(cnts,'cnts')
|
||||||
|
with Pool(cpu_count()) as p:
|
||||||
|
cnts_org = p.starmap(loop_contour_image, [(index_l,cnts, img,slope_first) for index_l in range(len(cnts))])
|
||||||
|
|
||||||
|
print(len(cnts_org),'lendiha')
|
||||||
|
|
||||||
|
return cnts_org
|
||||||
|
|
||||||
def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
||||||
|
|
||||||
cnts_org = []
|
cnts_org = []
|
||||||
|
@ -175,11 +266,43 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
||||||
# print(np.shape(cont_int[0]))
|
# print(np.shape(cont_int[0]))
|
||||||
cnts_org.append(cont_int[0])
|
cnts_org.append(cont_int[0])
|
||||||
|
|
||||||
# print(cnts_org,'cnts_org')
|
return cnts_org
|
||||||
|
|
||||||
|
def get_textregion_contours_in_org_image_light(cnts, img, slope_first):
|
||||||
|
|
||||||
|
h_o = img.shape[0]
|
||||||
|
w_o = img.shape[1]
|
||||||
|
|
||||||
|
img = cv2.resize(img, (int(img.shape[1]/3.), int(img.shape[0]/3.)), interpolation=cv2.INTER_NEAREST)
|
||||||
|
##cnts = list( (np.array(cnts)/2).astype(np.int16) )
|
||||||
|
#cnts = cnts/2
|
||||||
|
cnts = [(i/ 3).astype(np.int32) for i in cnts]
|
||||||
|
cnts_org = []
|
||||||
|
#print(cnts,'cnts')
|
||||||
|
for i in range(len(cnts)):
|
||||||
|
img_copy = np.zeros(img.shape)
|
||||||
|
img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1))
|
||||||
|
|
||||||
|
# plt.imshow(img_copy)
|
||||||
|
# plt.show()
|
||||||
|
|
||||||
|
# print(img.shape,'img')
|
||||||
|
img_copy = rotation_image_new(img_copy, -slope_first)
|
||||||
|
##print(img_copy.shape,'img_copy')
|
||||||
|
# plt.imshow(img_copy)
|
||||||
|
# plt.show()
|
||||||
|
|
||||||
|
img_copy = img_copy.astype(np.uint8)
|
||||||
|
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
||||||
|
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||||
|
|
||||||
|
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||||
|
|
||||||
|
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
||||||
|
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
|
||||||
|
# print(np.shape(cont_int[0]))
|
||||||
|
cnts_org.append(cont_int[0]*3)
|
||||||
|
|
||||||
# sys.exit()
|
|
||||||
# self.y_shift = np.abs(img_copy.shape[0] - img.shape[0])
|
|
||||||
# self.x_shift = np.abs(img_copy.shape[1] - img.shape[1])
|
|
||||||
return cnts_org
|
return cnts_org
|
||||||
|
|
||||||
def return_contours_of_interested_textline(region_pre_p, pixel):
|
def return_contours_of_interested_textline(region_pre_p, pixel):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue