This commit is contained in:
Robert Sachunsky 2025-11-16 15:36:06 +00:00 committed by GitHub
commit 850221d9ea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 1086 additions and 1414 deletions

View file

@ -79,18 +79,28 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
type=click.Path(file_okay=True, dir_okay=True), type=click.Path(file_okay=True, dir_okay=True),
required=True, required=True,
) )
@click.option(
"--overwrite",
"-O",
help="overwrite (instead of skipping) if output xml exists",
is_flag=True,
)
@click.option( @click.option(
"--log_level", "--log_level",
"-l", "-l",
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
help="Override log level globally to this", help="Override log level globally to this",
) )
def binarization(patches, model_dir, input_image, dir_in, output, log_level): def binarization(patches, model_dir, input_image, dir_in, output, overwrite, log_level):
assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
binarizer = SbbBinarizer(model_dir) binarizer = SbbBinarizer(model_dir)
if log_level: if log_level:
binarizer.log.setLevel(getLevelName(log_level)) binarizer.logger.setLevel(getLevelName(log_level))
binarizer.run(image_path=input_image, use_patches=patches, output=output, dir_in=dir_in) binarizer.run(overwrite=overwrite,
use_patches=patches,
image_path=input_image,
output=output,
dir_in=dir_in)
@main.command() @main.command()

View file

@ -88,12 +88,7 @@ from .utils.contour import (
join_polygons, join_polygons,
make_intersection, make_intersection,
) )
from .utils.rotate import ( from .utils.rotate import rotate_image
rotate_image,
rotation_not_90_func,
rotation_not_90_func_full_layout,
rotation_image_new
)
from .utils.utils_ocr import ( from .utils.utils_ocr import (
return_start_and_end_of_common_text_of_textline_ocr_without_common_section, return_start_and_end_of_common_text_of_textline_ocr_without_common_section,
return_textline_contour_with_added_box_coordinate, return_textline_contour_with_added_box_coordinate,
@ -139,7 +134,6 @@ from .utils import (
return_boxes_of_images_by_order_of_reading_new return_boxes_of_images_by_order_of_reading_new
) )
from .utils.pil_cv2 import check_dpi, pil2cv from .utils.pil_cv2 import check_dpi, pil2cv
from .utils.xml import order_and_id_of_texts
from .plot import EynollahPlotter from .plot import EynollahPlotter
from .writer import EynollahXmlWriter from .writer import EynollahXmlWriter
@ -2091,19 +2085,19 @@ class Eynollah:
prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] prediction_regions_org = prediction_regions_org[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org=prediction_regions_org[:,:,0]
mask_lines_only = (prediction_regions_org[:,:] ==3)*1 mask_seps_only = (prediction_regions_org[:,:] == 3)*1
mask_texts_only = (prediction_regions_org[:,:] ==1)*1 mask_texts_only = (prediction_regions_org[:,:] ==1)*1
mask_images_only=(prediction_regions_org[:,:] ==2)*1 mask_images_only=(prediction_regions_org[:,:] ==2)*1
polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) polygons_seplines, hir_seplines = return_contours_of_image(mask_seps_only)
polygons_seplines = filter_contours_area_of_image( polygons_seplines = filter_contours_area_of_image(
mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) mask_seps_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) polygons_of_only_seps = return_contours_of_interested_region(mask_seps_only,1,0.00001)
text_regions_p_true = np.zeros(prediction_regions_org.shape) text_regions_p_true = np.zeros(prediction_regions_org.shape)
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_seps, color=(3,3,3))
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1,1,1)) text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1,1,1))
@ -2282,7 +2276,7 @@ class Eynollah:
img_bin = resize_image(img_bin, img_height_h, img_width_h ) img_bin = resize_image(img_bin, img_height_h, img_width_h )
prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org=prediction_regions_org[:,:,0]
mask_lines_only = (prediction_regions_org[:,:] ==3)*1 mask_seps_only = (prediction_regions_org[:,:] == 3)*1
mask_texts_only = (prediction_regions_org[:,:] ==1)*1 mask_texts_only = (prediction_regions_org[:,:] ==1)*1
mask_texts_only = mask_texts_only.astype('uint8') mask_texts_only = mask_texts_only.astype('uint8')
@ -2293,7 +2287,7 @@ class Eynollah:
mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1)
mask_images_only=(prediction_regions_org[:,:] ==2)*1 mask_images_only=(prediction_regions_org[:,:] ==2)*1
polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) polygons_seplines, hir_seplines = return_contours_of_image(mask_seps_only)
test_khat = np.zeros(prediction_regions_org.shape) test_khat = np.zeros(prediction_regions_org.shape)
test_khat = cv2.fillPoly(test_khat, pts=polygons_seplines, color=(1,1,1)) test_khat = cv2.fillPoly(test_khat, pts=polygons_seplines, color=(1,1,1))
@ -2307,7 +2301,7 @@ class Eynollah:
#plt.show() #plt.show()
polygons_seplines = filter_contours_area_of_image( polygons_seplines = filter_contours_area_of_image(
mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) mask_seps_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
test_khat = np.zeros(prediction_regions_org.shape) test_khat = np.zeros(prediction_regions_org.shape)
test_khat = cv2.fillPoly(test_khat, pts = polygons_seplines, color=(1,1,1)) test_khat = cv2.fillPoly(test_khat, pts = polygons_seplines, color=(1,1,1))
@ -2318,10 +2312,10 @@ class Eynollah:
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts) ##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts)
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) polygons_of_only_seps = return_contours_of_interested_region(mask_seps_only,1,0.00001)
text_regions_p_true = np.zeros(prediction_regions_org.shape) text_regions_p_true = np.zeros(prediction_regions_org.shape)
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3,3,3)) text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_seps, color=(3,3,3))
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
@ -2377,7 +2371,7 @@ class Eynollah:
prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h )
mask_zeros2 = (prediction_regions_org2[:,:,0] == 0) mask_zeros2 = (prediction_regions_org2[:,:,0] == 0)
mask_lines2 = (prediction_regions_org2[:,:,0] == 3) mask_seps2 = (prediction_regions_org2[:,:,0] == 3)
text_sume_early = (prediction_regions_org[:,:] == 1).sum() text_sume_early = (prediction_regions_org[:,:] == 1).sum()
prediction_regions_org_copy = np.copy(prediction_regions_org) prediction_regions_org_copy = np.copy(prediction_regions_org)
prediction_regions_org_copy[(prediction_regions_org_copy[:,:]==1) & (mask_zeros2[:,:]==1)] = 0 prediction_regions_org_copy[(prediction_regions_org_copy[:,:]==1) & (mask_zeros2[:,:]==1)] = 0
@ -2388,8 +2382,8 @@ class Eynollah:
if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD): if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD):
prediction_regions_org = np.copy(prediction_regions_org_copy) prediction_regions_org = np.copy(prediction_regions_org_copy)
prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3 prediction_regions_org[(mask_seps2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3
mask_lines_only=(prediction_regions_org[:,:]==3)*1 mask_seps_only=(prediction_regions_org[:,:]==3)*1
prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2) prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2)
prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2) prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2)
@ -2411,20 +2405,20 @@ class Eynollah:
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org=prediction_regions_org[:,:,0]
mask_lines_only=(prediction_regions_org[:,:]==3)*1 mask_seps_only=(prediction_regions_org[:,:]==3)*1
mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_texts_only=(prediction_regions_org[:,:]==1)*1
mask_images_only=(prediction_regions_org[:,:]==2)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1
polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) polygons_seplines, hir_seplines = return_contours_of_image(mask_seps_only)
polygons_seplines = filter_contours_area_of_image( polygons_seplines = filter_contours_area_of_image(
mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) mask_seps_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001)
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) polygons_of_only_seps = return_contours_of_interested_region(mask_seps_only, 1, 0.00001)
text_regions_p_true = np.zeros(prediction_regions_org.shape) text_regions_p_true = np.zeros(prediction_regions_org.shape)
text_regions_p_true = cv2.fillPoly(text_regions_p_true,pts = polygons_of_only_lines, color=(3, 3, 3)) text_regions_p_true = cv2.fillPoly(text_regions_p_true,pts = polygons_of_only_seps, color=(3, 3, 3))
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))
@ -2449,7 +2443,7 @@ class Eynollah:
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org=prediction_regions_org[:,:,0]
#mask_lines_only=(prediction_regions_org[:,:]==3)*1 #mask_seps_only=(prediction_regions_org[:,:]==3)*1
#img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
#prediction_regions_org = self.do_prediction(True, img, self.models["region"]) #prediction_regions_org = self.do_prediction(True, img, self.models["region"])
@ -2457,19 +2451,19 @@ class Eynollah:
#prediction_regions_org = prediction_regions_org[:,:,0] #prediction_regions_org = prediction_regions_org[:,:,0]
#prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0
mask_lines_only = (prediction_regions_org == 3)*1 mask_seps_only = (prediction_regions_org == 3)*1
mask_texts_only = (prediction_regions_org == 1)*1 mask_texts_only = (prediction_regions_org == 1)*1
mask_images_only= (prediction_regions_org == 2)*1 mask_images_only= (prediction_regions_org == 2)*1
polygons_seplines, hir_seplines = return_contours_of_image(mask_lines_only) polygons_seplines, hir_seplines = return_contours_of_image(mask_seps_only)
polygons_seplines = filter_contours_area_of_image( polygons_seplines = filter_contours_area_of_image(
mask_lines_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1) mask_seps_only, polygons_seplines, hir_seplines, max_area=1, min_area=0.00001, dilate=1)
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) polygons_of_only_seps = return_contours_of_interested_region(mask_seps_only,1,0.00001)
text_regions_p_true = np.zeros(prediction_regions_org.shape) text_regions_p_true = np.zeros(prediction_regions_org.shape)
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_seps, color=(3,3,3))
text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
@ -2491,11 +2485,15 @@ class Eynollah:
contours_only_text_parent) contours_only_text_parent)
cx_head, cy_head, mx_head, Mx_head, my_head, My_head, mxy_head = find_new_features_of_contours( cx_head, cy_head, mx_head, Mx_head, my_head, My_head, mxy_head = find_new_features_of_contours(
contours_only_text_parent_h) contours_only_text_parent_h)
cx_main = np.array(cx_main, dtype=int)
cy_main = np.array(cy_main, dtype=int)
cx_head = np.array(cx_head, dtype=int)
cy_head = np.array(cy_head, dtype=int)
def match_boxes(only_centers: bool): def match_boxes(only_centers: bool):
arg_text_con_main = np.zeros(len(contours_only_text_parent), dtype=int) arg_text_con_main = np.zeros(len(contours_only_text_parent), dtype=int)
for ii in range(len(contours_only_text_parent)): for ii in range(len(contours_only_text_parent)):
check_if_textregion_located_in_a_box = False box_found = False
for jj, box in enumerate(boxes): for jj, box in enumerate(boxes):
if ((cx_main[ii] >= box[0] and if ((cx_main[ii] >= box[0] and
cx_main[ii] < box[1] and cx_main[ii] < box[1] and
@ -2506,20 +2504,23 @@ class Eynollah:
my_main[ii] >= box[2] and my_main[ii] >= box[2] and
My_main[ii] < box[3])): My_main[ii] < box[3])):
arg_text_con_main[ii] = jj arg_text_con_main[ii] = jj
check_if_textregion_located_in_a_box = True box_found = True
# print("main/matched ", ii, "\t", (mx_main[ii], Mx_main[ii], my_main[ii], My_main[ii]), "\tin", jj, box, only_centers)
break break
if not check_if_textregion_located_in_a_box: if not box_found:
dists_tr_from_box = np.linalg.norm(c_boxes - np.array([[cy_main[ii]], [cx_main[ii]]]), axis=0) dists_tr_from_box = np.linalg.norm(c_boxes - np.array([[cy_main[ii]], [cx_main[ii]]]), axis=0)
pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) & pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) &
(boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1])) (boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1]))
assert pcontained_in_box.any(), (ii, cx_main[ii], cy_main[ii])
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box)) ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
arg_text_con_main[ii] = ind_min arg_text_con_main[ii] = ind_min
# print("main/fallback ", ii, "\t", (mx_main[ii], Mx_main[ii], my_main[ii], My_main[ii]), "\tin", ind_min, boxes[ind_min], only_centers)
args_contours_main = np.arange(len(contours_only_text_parent)) args_contours_main = np.arange(len(contours_only_text_parent))
order_by_con_main = np.zeros_like(arg_text_con_main) order_by_con_main = np.zeros_like(arg_text_con_main)
arg_text_con_head = np.zeros(len(contours_only_text_parent_h), dtype=int) arg_text_con_head = np.zeros(len(contours_only_text_parent_h), dtype=int)
for ii in range(len(contours_only_text_parent_h)): for ii in range(len(contours_only_text_parent_h)):
check_if_textregion_located_in_a_box = False box_found = False
for jj, box in enumerate(boxes): for jj, box in enumerate(boxes):
if ((cx_head[ii] >= box[0] and if ((cx_head[ii] >= box[0] and
cx_head[ii] < box[1] and cx_head[ii] < box[1] and
@ -2530,20 +2531,21 @@ class Eynollah:
my_head[ii] >= box[2] and my_head[ii] >= box[2] and
My_head[ii] < box[3])): My_head[ii] < box[3])):
arg_text_con_head[ii] = jj arg_text_con_head[ii] = jj
check_if_textregion_located_in_a_box = True box_found = True
# print("head/matched ", ii, "\t", (mx_head[ii], Mx_head[ii], my_head[ii], My_head[ii]), "\tin", jj, box, only_centers)
break break
if not check_if_textregion_located_in_a_box: if not box_found:
dists_tr_from_box = np.linalg.norm(c_boxes - np.array([[cy_head[ii]], [cx_head[ii]]]), axis=0) dists_tr_from_box = np.linalg.norm(c_boxes - np.array([[cy_head[ii]], [cx_head[ii]]]), axis=0)
pcontained_in_box = ((boxes[:, 2] <= cy_head[ii]) & (cy_head[ii] < boxes[:, 3]) & pcontained_in_box = ((boxes[:, 2] <= cy_head[ii]) & (cy_head[ii] < boxes[:, 3]) &
(boxes[:, 0] <= cx_head[ii]) & (cx_head[ii] < boxes[:, 1])) (boxes[:, 0] <= cx_head[ii]) & (cx_head[ii] < boxes[:, 1]))
assert pcontained_in_box.any(), (ii, cx_head[ii], cy_head[ii])
ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box)) ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box))
arg_text_con_head[ii] = ind_min arg_text_con_head[ii] = ind_min
# print("head/fallback ", ii, "\t", (mx_head[ii], Mx_head[ii], my_head[ii], My_head[ii]), "\tin", ind_min, boxes[ind_min], only_centers)
args_contours_head = np.arange(len(contours_only_text_parent_h)) args_contours_head = np.arange(len(contours_only_text_parent_h))
order_by_con_head = np.zeros_like(arg_text_con_head) order_by_con_head = np.zeros_like(arg_text_con_head)
ref_point = 0 idx = 0
order_of_texts_tot = []
id_of_texts_tot = []
for iij, box in enumerate(boxes): for iij, box in enumerate(boxes):
ys = slice(*box[2:4]) ys = slice(*box[2:4])
xs = slice(*box[0:2]) xs = slice(*box[0:2])
@ -2552,42 +2554,30 @@ class Eynollah:
con_inter_box = contours_only_text_parent[args_contours_box_main] con_inter_box = contours_only_text_parent[args_contours_box_main]
con_inter_box_h = contours_only_text_parent_h[args_contours_box_head] con_inter_box_h = contours_only_text_parent_h[args_contours_box_head]
indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( _, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(
textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2]) textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2], box[0])
order_of_texts, id_of_texts = order_and_id_of_texts( for tidx, kind in zip(index_by_kind_sorted, kind_of_texts_sorted):
con_inter_box, con_inter_box_h, if kind == 1:
indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) # print(iij, "main", args_contours_box_main[tidx], "becomes", idx)
order_by_con_main[args_contours_box_main[tidx]] = idx
else:
# print(iij, "head", args_contours_box_head[tidx], "becomes", idx)
order_by_con_head[args_contours_box_head[tidx]] = idx
idx += 1
indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1] # xml writer will create region ids in order of
indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1] # - contours_only_text_parent (main text), followed by
indexes_sorted_head = indexes_sorted[kind_of_texts_sorted == 2] # - contours_only_text_parent (headings),
indexes_by_type_head = index_by_kind_sorted[kind_of_texts_sorted == 2] # and then create regionrefs into these ordered by order_text_new
order_text_new = np.argsort(np.concatenate((order_by_con_main,
for zahler, _ in enumerate(args_contours_box_main): order_by_con_head)))
arg_order_v = indexes_sorted_main[zahler] return order_text_new
order_by_con_main[args_contours_box_main[indexes_by_type_main[zahler]]] = \
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
for zahler, _ in enumerate(args_contours_box_head):
arg_order_v = indexes_sorted_head[zahler]
order_by_con_head[args_contours_box_head[indexes_by_type_head[zahler]]] = \
np.flatnonzero(indexes_sorted == arg_order_v) + ref_point
for jji in range(len(id_of_texts)):
order_of_texts_tot.append(order_of_texts[jji] + ref_point)
id_of_texts_tot.append(id_of_texts[jji])
ref_point += len(id_of_texts)
order_of_texts_tot = np.concatenate((order_by_con_main,
order_by_con_head))
order_text_new = np.argsort(order_of_texts_tot)
return order_text_new, id_of_texts_tot
try: try:
results = match_boxes(False) results = match_boxes(False)
except Exception as why: except Exception as why:
self.logger.error(why) self.logger.exception(why)
results = match_boxes(True) results = match_boxes(True)
self.logger.debug("exit do_order_of_regions") self.logger.debug("exit do_order_of_regions")
@ -2665,45 +2655,35 @@ class Eynollah:
return layout_org, contours_new return layout_org, contours_new
def delete_separator_around(self, spliter_y,peaks_neg,image_by_region, pixel_line, pixel_table): def delete_separator_around(self, splitter_y, peaks_neg, image_by_region, label_seps, label_table):
# format of subboxes: box=[x1, x2 , y1, y2] # format of subboxes: box=[x1, x2 , y1, y2]
pix_del = 100 pix_del = 100
if len(image_by_region.shape)==3: for i in range(len(splitter_y)-1):
for i in range(len(spliter_y)-1):
for j in range(1,len(peaks_neg[i])-1): for j in range(1,len(peaks_neg[i])-1):
ys = slice(int(spliter_y[i]), where = np.index_exp[splitter_y[i]:
int(spliter_y[i+1])) splitter_y[i+1],
xs = slice(peaks_neg[i][j] - pix_del, peaks_neg[i][j] - pix_del:
peaks_neg[i][j] + pix_del) peaks_neg[i][j] + pix_del,
image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_line] = 0 :]
image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_line] = 0 if image_by_region.ndim < 3:
image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_line] = 0 where = where[:2]
image_by_region[ys,xs,0][image_by_region[ys,xs,0]==pixel_table] = 0
image_by_region[ys,xs,0][image_by_region[ys,xs,1]==pixel_table] = 0
image_by_region[ys,xs,0][image_by_region[ys,xs,2]==pixel_table] = 0
else: else:
for i in range(len(spliter_y)-1): print("image_by_region ndim is 3!") # rs
for j in range(1,len(peaks_neg[i])-1): image_by_region[where][image_by_region[where] == label_seps] = 0
ys = slice(int(spliter_y[i]), image_by_region[where][image_by_region[where] == label_table] = 0
int(spliter_y[i+1]))
xs = slice(peaks_neg[i][j] - pix_del,
peaks_neg[i][j] + pix_del)
image_by_region[ys,xs][image_by_region[ys,xs]==pixel_line] = 0
image_by_region[ys,xs][image_by_region[ys,xs]==pixel_table] = 0
return image_by_region return image_by_region
def add_tables_heuristic_to_layout( def add_tables_heuristic_to_layout(
self, image_regions_eraly_p, boxes, self, image_regions_eraly_p, boxes,
slope_mean_hor, spliter_y, peaks_neg_tot, image_revised, slope_mean_hor, splitter_y, peaks_neg_tot, image_revised,
num_col_classifier, min_area, pixel_line): num_col_classifier, min_area, label_seps):
pixel_table =10 label_table =10
image_revised_1 = self.delete_separator_around(spliter_y, peaks_neg_tot, image_revised, pixel_line, pixel_table) image_revised_1 = self.delete_separator_around(splitter_y, peaks_neg_tot, image_revised, label_seps, label_table)
try: try:
image_revised_1[:,:30][image_revised_1[:,:30]==pixel_line] = 0 image_revised_1[:,:30][image_revised_1[:,:30]==label_seps] = 0
image_revised_1[:,-30:][image_revised_1[:,-30:]==pixel_line] = 0 image_revised_1[:,-30:][image_revised_1[:,-30:]==label_seps] = 0
except: except:
pass pass
boxes = np.array(boxes, dtype=int) # to be on the safe side boxes = np.array(boxes, dtype=int) # to be on the safe side
@ -2714,7 +2694,7 @@ class Eynollah:
_, thresh = cv2.threshold(image_col, 0, 255, 0) _, thresh = cv2.threshold(image_col, 0, 255, 0)
contours,hirarchy=cv2.findContours(thresh.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) contours,hirarchy=cv2.findContours(thresh.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
if indiv==pixel_table: if indiv==label_table:
main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy,
max_area=1, min_area=0.001) max_area=1, min_area=0.001)
else: else:
@ -2730,11 +2710,11 @@ class Eynollah:
box_xs = slice(*boxes[i][0:2]) box_xs = slice(*boxes[i][0:2])
image_box = img_comm[box_ys, box_xs] image_box = img_comm[box_ys, box_xs]
try: try:
image_box_tabels_1 = (image_box == pixel_table) * 1 image_box_tabels_1 = (image_box == label_table) * 1
contours_tab,_=return_contours_of_image(image_box_tabels_1) contours_tab,_=return_contours_of_image(image_box_tabels_1)
contours_tab=filter_contours_area_of_image_tables(image_box_tabels_1,contours_tab,_,1,0.003) contours_tab=filter_contours_area_of_image_tables(image_box_tabels_1,contours_tab,_,1,0.003)
image_box_tabels_1 = (image_box == pixel_line).astype(np.uint8) * 1 image_box_tabels_1 = (image_box == label_seps).astype(np.uint8) * 1
image_box_tabels_and_m_text = ( (image_box == pixel_table) | image_box_tabels_and_m_text = ( (image_box == label_table) |
(image_box == 1) ).astype(np.uint8) * 1 (image_box == 1) ).astype(np.uint8) * 1
image_box_tabels_1 = cv2.dilate(image_box_tabels_1, KERNEL, iterations=5) image_box_tabels_1 = cv2.dilate(image_box_tabels_1, KERNEL, iterations=5)
@ -2796,7 +2776,7 @@ class Eynollah:
y_up_tabs=[] y_up_tabs=[]
for ii in range(len(y_up_tabs)): for ii in range(len(y_up_tabs)):
image_box[y_up_tabs[ii]:y_down_tabs[ii]] = pixel_table image_box[y_up_tabs[ii]:y_down_tabs[ii]] = label_table
image_revised_last[box_ys, box_xs] = image_box image_revised_last[box_ys, box_xs] = image_box
else: else:
@ -2807,14 +2787,14 @@ class Eynollah:
image_revised_last[box_ys, box_xs] = image_box image_revised_last[box_ys, box_xs] = image_box
if num_col_classifier==1: if num_col_classifier==1:
img_tables_col_1 = (image_revised_last == pixel_table).astype(np.uint8) img_tables_col_1 = (image_revised_last == label_table).astype(np.uint8)
contours_table_col1, _ = return_contours_of_image(img_tables_col_1) contours_table_col1, _ = return_contours_of_image(img_tables_col_1)
_,_ ,_ , _, y_min_tab_col1 ,y_max_tab_col1, _= find_new_features_of_contours(contours_table_col1) _,_ ,_ , _, y_min_tab_col1 ,y_max_tab_col1, _= find_new_features_of_contours(contours_table_col1)
if len(y_min_tab_col1)>0: if len(y_min_tab_col1)>0:
for ijv in range(len(y_min_tab_col1)): for ijv in range(len(y_min_tab_col1)):
image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv])] = pixel_table image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv])] = label_table
return image_revised_last return image_revised_last
def get_tables_from_model(self, img, num_col_classifier): def get_tables_from_model(self, img, num_col_classifier):
@ -2952,8 +2932,8 @@ class Eynollah:
mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = (text_regions_p_1[:, :] == 2) * 1
mask_images = mask_images.astype(np.uint8) mask_images = mask_images.astype(np.uint8)
mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10)
mask_lines = (text_regions_p_1[:, :] == 3) * 1 mask_seps = (text_regions_p_1[:, :] == 3) * 1
mask_lines = mask_lines.astype(np.uint8) mask_seps = mask_seps.astype(np.uint8)
img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1
img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8)
@ -2976,10 +2956,10 @@ class Eynollah:
max(self.num_col_lower or num_col_classifier, max(self.num_col_lower or num_col_classifier,
num_col_classifier)) num_col_classifier))
except Exception as why: except Exception as why:
self.logger.error(why) self.logger.exception(why)
num_col = None num_col = None
#print("inside graphics 3 ", time.time() - t_in_gr) #print("inside graphics 3 ", time.time() - t_in_gr)
return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_seps,
text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light) text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light)
def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light): def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light):
@ -3029,8 +3009,8 @@ class Eynollah:
mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = (text_regions_p_1[:, :] == 2) * 1
mask_images = mask_images.astype(np.uint8) mask_images = mask_images.astype(np.uint8)
mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10)
mask_lines = (text_regions_p_1[:, :] == 3) * 1 mask_seps = (text_regions_p_1[:, :] == 3) * 1
mask_lines = mask_lines.astype(np.uint8) mask_seps = mask_seps.astype(np.uint8)
img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1
img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8)
@ -3044,9 +3024,9 @@ class Eynollah:
if not num_column_is_classified: if not num_column_is_classified:
num_col_classifier = num_col + 1 num_col_classifier = num_col + 1
except Exception as why: except Exception as why:
self.logger.error(why) self.logger.exception(why)
num_col = None num_col = None
return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, return (num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_seps,
text_regions_p_1, cont_page, table_prediction) text_regions_p_1, cont_page, table_prediction)
def run_enhancement(self, light_version): def run_enhancement(self, light_version):
@ -3101,13 +3081,13 @@ class Eynollah:
return slope_deskew return slope_deskew
def run_marginals( def run_marginals(
self, textline_mask_tot_ea, mask_images, mask_lines, self, textline_mask_tot_ea, mask_images, mask_seps,
num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): num_col_classifier, slope_deskew, text_regions_p_1, table_prediction):
textline_mask_tot = textline_mask_tot_ea[:, :] textline_mask_tot = textline_mask_tot_ea[:, :]
textline_mask_tot[mask_images[:, :] == 1] = 0 textline_mask_tot[mask_images[:, :] == 1] = 0
text_regions_p_1[mask_lines[:, :] == 1] = 3 text_regions_p_1[mask_seps[:, :] == 1] = 3
text_regions_p = text_regions_p_1[:, :] text_regions_p = text_regions_p_1[:, :]
text_regions_p = np.array(text_regions_p) text_regions_p = np.array(text_regions_p)
if num_col_classifier in (1, 2): if num_col_classifier in (1, 2):
@ -3131,12 +3111,10 @@ class Eynollah:
self.logger.debug('enter run_boxes_no_full_layout') self.logger.debug('enter run_boxes_no_full_layout')
t_0_box = time.time() t_0_box = time.time()
if np.abs(slope_deskew) >= SLOPE_THRESHOLD: if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
_, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func( textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) table_prediction_n = rotate_image(table_prediction, slope_deskew)
textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1]) regions_without_separators_d = (text_regions_p_d[:, :] == 1) * 1
table_prediction_n = resize_image(table_prediction_n, text_regions_p.shape[0], text_regions_p.shape[1])
regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1
if self.tables: if self.tables:
regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
regions_without_separators = (text_regions_p[:, :] == 1) * 1 regions_without_separators = (text_regions_p[:, :] == 1) * 1
@ -3146,17 +3124,17 @@ class Eynollah:
if self.tables: if self.tables:
regions_without_separators[table_prediction ==1 ] = 1 regions_without_separators[table_prediction ==1 ] = 1
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
text_regions_p_1_n = None text_regions_p_d = None
textline_mask_tot_d = None textline_mask_tot_d = None
regions_without_separators_d = None regions_without_separators_d = None
pixel_lines = 3 label_seps = 3
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
_, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( _, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
text_regions_p, num_col_classifier, self.tables, pixel_lines) text_regions_p, num_col_classifier, self.tables, label_seps)
if np.abs(slope_deskew) >= SLOPE_THRESHOLD: if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
_, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( _, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
text_regions_p_1_n, num_col_classifier, self.tables, pixel_lines) text_regions_p_d, num_col_classifier, self.tables, label_seps)
#print(time.time()-t_0_box,'time box in 2') #print(time.time()-t_0_box,'time box in 2')
self.logger.info("num_col_classifier: %s", num_col_classifier) self.logger.info("num_col_classifier: %s", num_col_classifier)
@ -3171,7 +3149,7 @@ class Eynollah:
t1 = time.time() t1 = time.time()
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(
splitter_y_new, regions_without_separators, matrix_of_lines_ch, splitter_y_new, regions_without_separators, text_regions_p, matrix_of_seps_ch,
num_col_classifier, erosion_hurts, self.tables, self.right2left) num_col_classifier, erosion_hurts, self.tables, self.right2left)
boxes_d = None boxes_d = None
self.logger.debug("len(boxes): %s", len(boxes)) self.logger.debug("len(boxes): %s", len(boxes))
@ -3183,17 +3161,17 @@ class Eynollah:
else: else:
text_regions_p_tables = np.copy(text_regions_p) text_regions_p_tables = np.copy(text_regions_p)
text_regions_p_tables[(table_prediction == 1)] = 10 text_regions_p_tables[(table_prediction == 1)] = 10
pixel_line = 3 label_seps = 3
img_revised_tab2 = self.add_tables_heuristic_to_layout( img_revised_tab2 = self.add_tables_heuristic_to_layout(
text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables, text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables,
num_col_classifier , 0.000005, pixel_line) num_col_classifier , 0.000005, label_seps)
#print(time.time()-t_0_box,'time box in 3.2') #print(time.time()-t_0_box,'time box in 3.2')
img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(
img_revised_tab2, table_prediction, 10, num_col_classifier) img_revised_tab2, table_prediction, 10, num_col_classifier)
#print(time.time()-t_0_box,'time box in 3.3') #print(time.time()-t_0_box,'time box in 3.3')
else: else:
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(
splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, splitter_y_new_d, regions_without_separators_d, text_regions_p_d, matrix_of_seps_ch_d,
num_col_classifier, erosion_hurts, self.tables, self.right2left) num_col_classifier, erosion_hurts, self.tables, self.right2left)
boxes = None boxes = None
self.logger.debug("len(boxes): %s", len(boxes_d)) self.logger.debug("len(boxes): %s", len(boxes_d))
@ -3202,15 +3180,15 @@ class Eynollah:
if self.light_version: if self.light_version:
pass pass
else: else:
text_regions_p_tables = np.copy(text_regions_p_1_n) text_regions_p_tables = np.copy(text_regions_p_d)
text_regions_p_tables = np.round(text_regions_p_tables) text_regions_p_tables = np.round(text_regions_p_tables)
text_regions_p_tables[(text_regions_p_tables != 3) & (table_prediction_n == 1)] = 10 text_regions_p_tables[(text_regions_p_tables != 3) & (table_prediction_n == 1)] = 10
pixel_line = 3 label_seps = 3
img_revised_tab2 = self.add_tables_heuristic_to_layout( img_revised_tab2 = self.add_tables_heuristic_to_layout(
text_regions_p_tables, boxes_d, 0, splitter_y_new_d, text_regions_p_tables, boxes_d, 0, splitter_y_new_d,
peaks_neg_tot_tables_d, text_regions_p_tables, peaks_neg_tot_tables_d, text_regions_p_tables,
num_col_classifier, 0.000005, pixel_line) num_col_classifier, 0.000005, label_seps)
img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(
img_revised_tab2, table_prediction_n, 10, num_col_classifier) img_revised_tab2, table_prediction_n, 10, num_col_classifier)
@ -3245,22 +3223,22 @@ class Eynollah:
else: else:
polygons_of_images = return_contours_of_interested_region(img_revised_tab, 2) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 2)
pixel_img = 4 label_marginalia = 4
min_area_mar = 0.00001 min_area_mar = 0.00001
if self.light_version: if self.light_version:
marginal_mask = (text_regions_p[:,:]==pixel_img)*1 marginal_mask = (text_regions_p[:,:]==label_marginalia)*1
marginal_mask = marginal_mask.astype('uint8') marginal_mask = marginal_mask.astype('uint8')
marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2) marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2)
polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar) polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar)
else: else:
polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) polygons_of_marginals = return_contours_of_interested_region(text_regions_p, label_marginalia, min_area_mar)
pixel_img = 10 label_tables = 10
contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) contours_tables = return_contours_of_interested_region(text_regions_p, label_tables, min_area_mar)
#print(time.time()-t_0_box,'time box in 5') #print(time.time()-t_0_box,'time box in 5')
self.logger.debug('exit run_boxes_no_full_layout') self.logger.debug('exit run_boxes_no_full_layout')
return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, return (polygons_of_images, img_revised_tab, text_regions_p_d, textline_mask_tot_d,
regions_without_separators_d, boxes, boxes_d, regions_without_separators_d, boxes, boxes_d,
polygons_of_marginals, contours_tables) polygons_of_marginals, contours_tables)
@ -3276,24 +3254,13 @@ class Eynollah:
text_regions_p[:,:][table_prediction[:,:]==1] = 10 text_regions_p[:,:][table_prediction[:,:]==1] = 10
img_revised_tab = text_regions_p[:,:] img_revised_tab = text_regions_p[:,:]
if np.abs(slope_deskew) >= SLOPE_THRESHOLD: if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
_, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
table_prediction, slope_deskew) table_prediction_n = rotate_image(table_prediction, slope_deskew)
regions_without_separators_d = (text_regions_p_d[:,:] == 1)*1
text_regions_p_1_n = resize_image(text_regions_p_1_n,
text_regions_p.shape[0],
text_regions_p.shape[1])
textline_mask_tot_d = resize_image(textline_mask_tot_d,
text_regions_p.shape[0],
text_regions_p.shape[1])
table_prediction_n = resize_image(table_prediction_n,
text_regions_p.shape[0],
text_regions_p.shape[1])
regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1
regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
else: else:
text_regions_p_1_n = None text_regions_p_d = None
textline_mask_tot_d = None textline_mask_tot_d = None
regions_without_separators_d = None regions_without_separators_d = None
# regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1 # regions_without_separators = ( text_regions_p[:,:]==1 | text_regions_p[:,:]==2 )*1
@ -3303,24 +3270,13 @@ class Eynollah:
else: else:
if np.abs(slope_deskew) >= SLOPE_THRESHOLD: if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
_, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = \ textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
table_prediction, slope_deskew) table_prediction_n = rotate_image(table_prediction, slope_deskew)
regions_without_separators_d = (text_regions_p_d[:,:] == 1)*1
text_regions_p_1_n = resize_image(text_regions_p_1_n,
text_regions_p.shape[0],
text_regions_p.shape[1])
textline_mask_tot_d = resize_image(textline_mask_tot_d,
text_regions_p.shape[0],
text_regions_p.shape[1])
table_prediction_n = resize_image(table_prediction_n,
text_regions_p.shape[0],
text_regions_p.shape[1])
regions_without_separators_d = (text_regions_p_1_n[:,:] == 1)*1
regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
else: else:
text_regions_p_1_n = None text_regions_p_d = None
textline_mask_tot_d = None textline_mask_tot_d = None
regions_without_separators_d = None regions_without_separators_d = None
@ -3329,14 +3285,14 @@ class Eynollah:
regions_without_separators = (text_regions_p[:,:] == 1)*1 regions_without_separators = (text_regions_p[:,:] == 1)*1
regions_without_separators[table_prediction == 1] = 1 regions_without_separators[table_prediction == 1] = 1
pixel_lines=3 label_seps=3
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( num_col, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
text_regions_p, num_col_classifier, self.tables, pixel_lines) text_regions_p, num_col_classifier, self.tables, label_seps)
if np.abs(slope_deskew) >= SLOPE_THRESHOLD: if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
num_col_d, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( num_col_d, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
text_regions_p_1_n, num_col_classifier, self.tables, pixel_lines) text_regions_p_d, num_col_classifier, self.tables, label_seps)
if num_col_classifier>=3: if num_col_classifier>=3:
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
@ -3351,30 +3307,30 @@ class Eynollah:
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(
splitter_y_new, regions_without_separators, matrix_of_lines_ch, splitter_y_new, regions_without_separators, text_regions_p, matrix_of_seps_ch,
num_col_classifier, erosion_hurts, self.tables, self.right2left) num_col_classifier, erosion_hurts, self.tables, self.right2left)
text_regions_p_tables = np.copy(text_regions_p) text_regions_p_tables = np.copy(text_regions_p)
text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10 text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10
pixel_line = 3 label_seps = 3
img_revised_tab2 = self.add_tables_heuristic_to_layout( img_revised_tab2 = self.add_tables_heuristic_to_layout(
text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables, text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables,
num_col_classifier , 0.000005, pixel_line) num_col_classifier , 0.000005, label_seps)
img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(
img_revised_tab2, table_prediction, 10, num_col_classifier) img_revised_tab2, table_prediction, 10, num_col_classifier)
else: else:
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(
splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, splitter_y_new_d, regions_without_separators_d, text_regions_p_d, matrix_of_seps_ch_d,
num_col_classifier, erosion_hurts, self.tables, self.right2left) num_col_classifier, erosion_hurts, self.tables, self.right2left)
text_regions_p_tables = np.copy(text_regions_p_1_n) text_regions_p_tables = np.copy(text_regions_p_d)
text_regions_p_tables = np.round(text_regions_p_tables) text_regions_p_tables = np.round(text_regions_p_tables)
text_regions_p_tables[(text_regions_p_tables != 3) & (table_prediction_n == 1)] = 10 text_regions_p_tables[(text_regions_p_tables != 3) & (table_prediction_n == 1)] = 10
pixel_line = 3 label_seps = 3
img_revised_tab2 = self.add_tables_heuristic_to_layout( img_revised_tab2 = self.add_tables_heuristic_to_layout(
text_regions_p_tables, boxes_d, 0, splitter_y_new_d, text_regions_p_tables, boxes_d, 0, splitter_y_new_d,
peaks_neg_tot_tables_d, text_regions_p_tables, peaks_neg_tot_tables_d, text_regions_p_tables,
num_col_classifier, 0.000005, pixel_line) num_col_classifier, 0.000005, label_seps)
img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables( img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(
img_revised_tab2, table_prediction_n, 10, num_col_classifier) img_revised_tab2, table_prediction_n, 10, num_col_classifier)
@ -3399,20 +3355,20 @@ class Eynollah:
text_regions_p[img_revised_tab == 10] = 10 text_regions_p[img_revised_tab == 10] = 10
#img_revised_tab[img_revised_tab2 == 10] = 10 #img_revised_tab[img_revised_tab2 == 10] = 10
pixel_img = 4 label_marginalia = 4
min_area_mar = 0.00001 min_area_mar = 0.00001
if self.light_version: if self.light_version:
marginal_mask = (text_regions_p[:,:]==pixel_img)*1 marginal_mask = (text_regions_p[:,:]==label_marginalia)*1
marginal_mask = marginal_mask.astype('uint8') marginal_mask = marginal_mask.astype('uint8')
marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2) marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2)
polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar) polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar)
else: else:
polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) polygons_of_marginals = return_contours_of_interested_region(text_regions_p, label_marginalia, min_area_mar)
pixel_img = 10 label_tables = 10
contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) contours_tables = return_contours_of_interested_region(text_regions_p, label_tables, min_area_mar)
# set first model with second model # set first model with second model
text_regions_p[:, :][text_regions_p[:, :] == 2] = 5 text_regions_p[:, :][text_regions_p[:, :] == 2] = 5
@ -3465,16 +3421,13 @@ class Eynollah:
#plt.show() #plt.show()
####if not self.tables: ####if not self.tables:
if np.abs(slope_deskew) >= SLOPE_THRESHOLD: if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
_, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout( textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew) text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
regions_fully_n = rotate_image(regions_fully, slope_deskew)
text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1])
textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1])
regions_fully_n = resize_image(regions_fully_n, text_regions_p.shape[0], text_regions_p.shape[1])
if not self.tables: if not self.tables:
regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1 regions_without_separators_d = (text_regions_p_d[:, :] == 1) * 1
else: else:
text_regions_p_1_n = None text_regions_p_d = None
textline_mask_tot_d = None textline_mask_tot_d = None
regions_without_separators_d = None regions_without_separators_d = None
if not self.tables: if not self.tables:
@ -3484,7 +3437,7 @@ class Eynollah:
self.logger.debug('exit run_boxes_full_layout') self.logger.debug('exit run_boxes_full_layout')
#print("full inside 3", time.time()- t_full0) #print("full inside 3", time.time()- t_full0)
return (polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, return (polygons_of_images, img_revised_tab, text_regions_p_d, textline_mask_tot_d,
regions_without_separators_d, regions_fully, regions_without_separators, regions_without_separators_d, regions_fully, regions_without_separators,
polygons_of_marginals, contours_tables) polygons_of_marginals, contours_tables)
@ -3632,7 +3585,7 @@ class Eynollah:
co_text_all = contours_only_text_parent co_text_all = contours_only_text_parent
if not len(co_text_all): if not len(co_text_all):
return [], [] return []
labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool) labels_con = np.zeros((int(y_len /6.), int(x_len/6.), len(co_text_all)), dtype=bool)
co_text_all = [(i/6).astype(int) for i in co_text_all] co_text_all = [(i/6).astype(int) for i in co_text_all]
@ -3715,11 +3668,9 @@ class Eynollah:
else: else:
org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]]) org_contours_indexes.extend([indexes_of_located_cont[region_with_curr_order]])
region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))] return org_contours_indexes
return org_contours_indexes, region_ids
else: else:
region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))] return ordered
return ordered, region_ids
def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot): def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot):
width = np.shape(textline_image)[1] width = np.shape(textline_image)[1]
@ -4213,7 +4164,7 @@ class Eynollah:
image_page, page_coord, cont_page = \ image_page, page_coord, cont_page = \
self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
pcgts = self.writer.build_pagexml_no_full_layout( pcgts = self.writer.build_pagexml_no_full_layout(
[], page_coord, [], [], [], [], [], page_coord, [], [], [],
polygons_of_images, [], [], [], [], [], [], [], [], [], polygons_of_images, [], [], [], [], [], [], [], [], [],
cont_page, [], []) cont_page, [], [])
if self.plotter: if self.plotter:
@ -4254,7 +4205,6 @@ class Eynollah:
order_text_new = [0] order_text_new = [0]
slopes =[0] slopes =[0]
id_of_texts_tot =['region_0001']
conf_contours_textregions =[0] conf_contours_textregions =[0]
if self.ocr and not self.tr: if self.ocr and not self.tr:
@ -4266,7 +4216,7 @@ class Eynollah:
ocr_all_textlines = None ocr_all_textlines = None
pcgts = self.writer.build_pagexml_no_full_layout( pcgts = self.writer.build_pagexml_no_full_layout(
cont_page, page_coord, order_text_new, id_of_texts_tot, cont_page, page_coord, order_text_new,
all_found_textline_polygons, page_coord, [], all_found_textline_polygons, page_coord, [],
[], [], [], [], [], [], [], [], [], [], [], [],
slopes, [], [], slopes, [], [],
@ -4301,7 +4251,7 @@ class Eynollah:
slope_deskew = self.run_deskew(textline_mask_tot_ea) slope_deskew = self.run_deskew(textline_mask_tot_ea)
#print("text region early -2,5 in %.1fs", time.time() - t0) #print("text region early -2,5 in %.1fs", time.time() - t0)
#self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t)
num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_seps, \
text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \
self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea,
num_col_classifier, num_column_is_classified, num_col_classifier, num_column_is_classified,
@ -4318,7 +4268,7 @@ class Eynollah:
confidence_matrix = np.zeros((text_regions_p_1.shape[:2])) confidence_matrix = np.zeros((text_regions_p_1.shape[:2]))
t1 = time.time() t1 = time.time()
num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_seps, \
text_regions_p_1, cont_page, table_prediction = \ text_regions_p_1, cont_page, table_prediction = \
self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified,
erosion_hurts) erosion_hurts)
@ -4332,7 +4282,7 @@ class Eynollah:
self.logger.info("No columns detected - generating empty PAGE-XML") self.logger.info("No columns detected - generating empty PAGE-XML")
pcgts = self.writer.build_pagexml_no_full_layout( pcgts = self.writer.build_pagexml_no_full_layout(
[], page_coord, [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], page_coord, [], [], [], [], [], [], [], [], [], [], [], [], [],
cont_page, [], []) cont_page, [], [])
return pcgts return pcgts
@ -4356,12 +4306,12 @@ class Eynollah:
image_page = resize_image(image_page,img_h_new, img_w_new ) image_page = resize_image(image_page,img_h_new, img_w_new )
textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new )
mask_images = resize_image(mask_images,img_h_new, img_w_new ) mask_images = resize_image(mask_images,img_h_new, img_w_new )
mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) mask_seps = resize_image(mask_seps, img_h_new, img_w_new)
text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new )
table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) table_prediction = resize_image(table_prediction,img_h_new, img_w_new )
textline_mask_tot, text_regions_p = \ textline_mask_tot, text_regions_p = \
self.run_marginals(textline_mask_tot_ea, mask_images, mask_lines, self.run_marginals(textline_mask_tot_ea, mask_images, mask_seps,
num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
if self.plotter: if self.plotter:
self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page) self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page)
@ -4398,14 +4348,14 @@ class Eynollah:
## birdan sora chock chakir ## birdan sora chock chakir
t1 = time.time() t1 = time.time()
if not self.full_layout: if not self.full_layout:
polygons_of_images, img_revised_tab, text_regions_p_1_n, \ polygons_of_images, img_revised_tab, text_regions_p_d, \
textline_mask_tot_d, regions_without_separators_d, \ textline_mask_tot_d, regions_without_separators_d, \
boxes, boxes_d, polygons_of_marginals, contours_tables = \ boxes, boxes_d, polygons_of_marginals, contours_tables = \
self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
num_col_classifier, table_prediction, erosion_hurts) num_col_classifier, table_prediction, erosion_hurts)
###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals) ###polygons_of_marginals = dilate_textregion_contours(polygons_of_marginals)
else: else:
polygons_of_images, img_revised_tab, text_regions_p_1_n, \ polygons_of_images, img_revised_tab, text_regions_p_d, \
textline_mask_tot_d, regions_without_separators_d, \ textline_mask_tot_d, regions_without_separators_d, \
regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \ regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = \
self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew,
@ -4419,7 +4369,7 @@ class Eynollah:
text_only = (img_revised_tab[:, :] == 1) * 1 text_only = (img_revised_tab[:, :] == 1) * 1
if np.abs(slope_deskew) >= SLOPE_THRESHOLD: if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
text_only_d = (text_regions_p_1_n[:, :] == 1) * 1 text_only_d = ((text_regions_p_d[:, :] == 1)) * 1
#print("text region early 2 in %.1fs", time.time() - t0) #print("text region early 2 in %.1fs", time.time() - t0)
###min_con_area = 0.000005 ###min_con_area = 0.000005
@ -4493,7 +4443,7 @@ class Eynollah:
dists[i] = np.linalg.norm(centers[:, i:i + 1] - centers_d, axis=0) dists[i] = np.linalg.norm(centers[:, i:i + 1] - centers_d, axis=0)
corresp = np.zeros(dists.shape, dtype=bool) corresp = np.zeros(dists.shape, dtype=bool)
# keep searching next-closest until at least one correspondence on each side # keep searching next-closest until at least one correspondence on each side
while not np.all(corresp.sum(axis=1)) and not np.all(corresp.sum(axis=0)): while not np.all(corresp.sum(axis=1)) or not np.all(corresp.sum(axis=0)):
idx = np.nanargmin(dists) idx = np.nanargmin(dists)
i, j = np.unravel_index(idx, dists.shape) i, j = np.unravel_index(idx, dists.shape)
dists[i, j] = np.nan dists[i, j] = np.nan
@ -4504,7 +4454,7 @@ class Eynollah:
# img1 = np.zeros(text_only_d.shape[:2], dtype=np.uint8) # img1 = np.zeros(text_only_d.shape[:2], dtype=np.uint8)
# for i in range(len(contours_only_text_parent)): # for i in range(len(contours_only_text_parent)):
# cv2.fillPoly(img1, pts=[contours_only_text_parent_d_ordered[i]], color=i + 1) # cv2.fillPoly(img1, pts=[contours_only_text_parent_d_ordered[i]], color=i + 1)
# plt.subplot(2, 2, 1, title="direct corresp contours") # plt.subplot(1, 4, 1, title="direct corresp contours")
# plt.imshow(img1) # plt.imshow(img1)
# img2 = np.zeros(text_only_d.shape[:2], dtype=np.uint8) # img2 = np.zeros(text_only_d.shape[:2], dtype=np.uint8)
# join deskewed regions mapping to single original ones # join deskewed regions mapping to single original ones
@ -4517,13 +4467,13 @@ class Eynollah:
contour_d = polygon2contour(join_polygons(polygons_d)) contour_d = polygon2contour(join_polygons(polygons_d))
contours_only_text_parent_d_ordered[i] = contour_d contours_only_text_parent_d_ordered[i] = contour_d
# cv2.fillPoly(img2, pts=[contour_d], color=i + 1) # cv2.fillPoly(img2, pts=[contour_d], color=i + 1)
# plt.subplot(2, 2, 3, title="joined contours") # plt.subplot(1, 4, 2, title="joined contours")
# plt.imshow(img2) # plt.imshow(img2)
# img3 = np.zeros(text_only_d.shape[:2], dtype=np.uint8) # img3 = np.zeros(text_only_d.shape[:2], dtype=np.uint8)
# split deskewed regions mapping to multiple original ones # split deskewed regions mapping to multiple original ones
def deskew(polygon): def deskew(polygon):
polygon = shapely.affinity.rotate(polygon, -slope_deskew, origin=center) polygon = shapely.affinity.rotate(polygon, -slope_deskew, origin=center)
polygon = shapely.affinity.translate(polygon, *offset.squeeze()) #polygon = shapely.affinity.translate(polygon, *offset.squeeze())
return polygon return polygon
for j in range(len(contours_only_text_parent_d)): for j in range(len(contours_only_text_parent_d)):
if np.count_nonzero(corresp[:, j]) > 1: if np.count_nonzero(corresp[:, j]) > 1:
@ -4541,21 +4491,45 @@ class Eynollah:
if polygon_d] if polygon_d]
contours_only_text_parent_d_ordered[indices] = contours_d contours_only_text_parent_d_ordered[indices] = contours_d
# cv2.fillPoly(img3, pts=contours_d, color=j + 1) # cv2.fillPoly(img3, pts=contours_d, color=j + 1)
# plt.subplot(2, 2, 4, title="split contours") # plt.subplot(1, 4, 3, title="split contours")
# plt.imshow(img3) # plt.imshow(img3)
# img4 = np.zeros(text_only_d.shape[:2], dtype=np.uint8) # img4 = np.zeros(text_only_d.shape[:2], dtype=np.uint8)
# for i in range(len(contours_only_text_parent)): # for i in range(len(contours_only_text_parent)):
# cv2.fillPoly(img4, pts=[contours_only_text_parent_d_ordered[i]], color=i + 1) # cv2.fillPoly(img4, pts=[contours_only_text_parent_d_ordered[i]], color=i + 1)
# plt.subplot(2, 2, 2, title="result contours") # plt.subplot(1, 4, 4, title="result contours")
# plt.imshow(img4) # plt.imshow(img4)
# plt.show() # plt.show()
# from matplotlib import patches as ptchs
# plt.subplot(1, 2, 1, title="undeskewed")
# plt.imshow(text_only)
# centers = np.stack(find_center_of_contours(contours_only_text_parent)) # [2, N]
# for i in range(len(contours_only_text_parent)):
# cnt = contours_only_text_parent[i]
# ctr = centers[:, i]
# plt.gca().add_patch(ptchs.Polygon(cnt[:, 0], closed=False, fill=False, color='blue'))
# plt.gca().scatter(ctr[0], ctr[1], 20, c='blue', marker='x')
# plt.gca().text(ctr[0], ctr[1], str(i), c='blue')
# plt.subplot(1, 2, 2, title="deskewed")
# plt.imshow(text_only_d)
# centers_d = np.stack(find_center_of_contours(contours_only_text_parent_d_ordered)) # [2, N]
# for i in range(len(contours_only_text_parent)):
# cnt = contours_only_text_parent[i]
# cnt = polygon2contour(deskew(contour2polygon(cnt)))
# plt.gca().add_patch(ptchs.Polygon(cnt[:, 0], closed=False, fill=False, color='blue'))
# for i in range(len(contours_only_text_parent_d_ordered)):
# cnt = contours_only_text_parent_d_ordered[i]
# ctr = centers_d[:, i]
# plt.gca().add_patch(ptchs.Polygon(cnt[:, 0], closed=False, fill=False, color='red'))
# plt.gca().scatter(ctr[0], ctr[1], 20, c='red', marker='x')
# plt.gca().text(ctr[0], ctr[1], str(i), c='red')
# plt.show()
if not len(contours_only_text_parent): if not len(contours_only_text_parent):
# stop early # stop early
empty_marginals = [[]] * len(polygons_of_marginals) empty_marginals = [[]] * len(polygons_of_marginals)
if self.full_layout: if self.full_layout:
pcgts = self.writer.build_pagexml_full_layout( pcgts = self.writer.build_pagexml_full_layout(
[], [], page_coord, [], [], [], [], [], [], [], [], page_coord, [], [], [], [], [],
polygons_of_images, contours_tables, [], polygons_of_images, contours_tables, [],
polygons_of_marginals, polygons_of_marginals, polygons_of_marginals, polygons_of_marginals,
empty_marginals, empty_marginals, empty_marginals, empty_marginals,
@ -4564,7 +4538,7 @@ class Eynollah:
cont_page, polygons_seplines) cont_page, polygons_seplines)
else: else:
pcgts = self.writer.build_pagexml_no_full_layout( pcgts = self.writer.build_pagexml_no_full_layout(
[], page_coord, [], [], [], [], [], page_coord, [], [], [],
polygons_of_images, polygons_of_images,
polygons_of_marginals, polygons_of_marginals, polygons_of_marginals, polygons_of_marginals,
empty_marginals, empty_marginals, empty_marginals, empty_marginals,
@ -4695,18 +4669,18 @@ class Eynollah:
label_seps = 6 label_seps = 6
if not self.headers_off: if not self.headers_off:
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( num_col, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
text_regions_p, num_col_classifier, self.tables, label_seps, contours_only_text_parent_h) text_regions_p, num_col_classifier, self.tables, label_seps, contours_only_text_parent_h)
else: else:
_, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( _, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
text_regions_p_1_n, num_col_classifier, self.tables, label_seps, contours_only_text_parent_h_d_ordered) text_regions_p_d, num_col_classifier, self.tables, label_seps, contours_only_text_parent_h_d_ordered)
elif self.headers_off: elif self.headers_off:
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( num_col, _, matrix_of_seps_ch, splitter_y_new, _ = find_number_of_columns_in_document(
text_regions_p, num_col_classifier, self.tables, label_seps) text_regions_p, num_col_classifier, self.tables, label_seps)
else: else:
_, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( _, _, matrix_of_seps_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(
text_regions_p_1_n, num_col_classifier, self.tables, label_seps) text_regions_p_d, num_col_classifier, self.tables, label_seps)
if num_col_classifier >= 3: if num_col_classifier >= 3:
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
@ -4717,13 +4691,13 @@ class Eynollah:
regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new( boxes, _ = return_boxes_of_images_by_order_of_reading_new(
splitter_y_new, regions_without_separators, matrix_of_lines_ch, splitter_y_new, regions_without_separators, text_regions_p, matrix_of_seps_ch,
num_col_classifier, erosion_hurts, self.tables, self.right2left, num_col_classifier, erosion_hurts, self.tables, self.right2left,
logger=self.logger) logger=self.logger)
else: else:
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new( boxes_d, _ = return_boxes_of_images_by_order_of_reading_new(
splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, splitter_y_new_d, regions_without_separators_d, text_regions_p_d, matrix_of_seps_ch_d,
num_col_classifier, erosion_hurts, self.tables, self.right2left, num_col_classifier, erosion_hurts, self.tables, self.right2left,
logger=self.logger) logger=self.logger)
else: else:
@ -4744,14 +4718,14 @@ class Eynollah:
self.logger.info("Headers ignored in reading order") self.logger.info("Headers ignored in reading order")
if self.reading_order_machine_based: if self.reading_order_machine_based:
order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( order_text_new = self.do_order_of_regions_with_model(
contours_only_text_parent, contours_only_text_parent_h, text_regions_p) contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
else: else:
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
order_text_new, id_of_texts_tot = self.do_order_of_regions( order_text_new = self.do_order_of_regions(
contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot)
else: else:
order_text_new, id_of_texts_tot = self.do_order_of_regions( order_text_new = self.do_order_of_regions(
contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered,
boxes_d, textline_mask_tot_d) boxes_d, textline_mask_tot_d)
self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s") self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
@ -4848,7 +4822,7 @@ class Eynollah:
if self.full_layout: if self.full_layout:
pcgts = self.writer.build_pagexml_full_layout( pcgts = self.writer.build_pagexml_full_layout(
contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new,
all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_images, contours_tables, polygons_of_drop_capitals,
polygons_of_marginals_left, polygons_of_marginals_right, polygons_of_marginals_left, polygons_of_marginals_right,
@ -4861,7 +4835,7 @@ class Eynollah:
conf_contours_textregions, conf_contours_textregions_h) conf_contours_textregions, conf_contours_textregions_h)
else: else:
pcgts = self.writer.build_pagexml_no_full_layout( pcgts = self.writer.build_pagexml_no_full_layout(
contours_only_text_parent, page_coord, order_text_new, id_of_texts_tot, contours_only_text_parent, page_coord, order_text_new,
all_found_textline_polygons, all_box_coord, polygons_of_images, all_found_textline_polygons, all_box_coord, polygons_of_images,
polygons_of_marginals_left, polygons_of_marginals_right, polygons_of_marginals_left, polygons_of_marginals_right,
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,

View file

@ -70,7 +70,7 @@ class SbbBinarizeProcessor(Processor):
if oplevel == 'page': if oplevel == 'page':
self.logger.info("Binarizing on 'page' level in page '%s'", page_id) self.logger.info("Binarizing on 'page' level in page '%s'", page_id)
page_image_bin = cv2pil(self.binarizer.run(image=pil2cv(page_image), use_patches=True)) page_image_bin = cv2pil(self.binarizer.run_single(image=pil2cv(page_image), use_patches=True))
# update PAGE (reference the image file): # update PAGE (reference the image file):
page_image_ref = AlternativeImageType(comments=page_xywh['features'] + ',binarized,clipped') page_image_ref = AlternativeImageType(comments=page_xywh['features'] + ',binarized,clipped')
page.add_AlternativeImage(page_image_ref) page.add_AlternativeImage(page_image_ref)
@ -83,7 +83,7 @@ class SbbBinarizeProcessor(Processor):
for region in regions: for region in regions:
region_image, region_xywh = self.workspace.image_from_segment( region_image, region_xywh = self.workspace.image_from_segment(
region, page_image, page_xywh, feature_filter='binarized') region, page_image, page_xywh, feature_filter='binarized')
region_image_bin = cv2pil(self.binarizer.run(image=pil2cv(region_image), use_patches=True)) region_image_bin = cv2pil(self.binarizer.run_single(image=pil2cv(region_image), use_patches=True))
# update PAGE (reference the image file): # update PAGE (reference the image file):
region_image_ref = AlternativeImageType(comments=region_xywh['features'] + ',binarized') region_image_ref = AlternativeImageType(comments=region_xywh['features'] + ',binarized')
region.add_AlternativeImage(region_image_ref) region.add_AlternativeImage(region_image_ref)
@ -95,7 +95,7 @@ class SbbBinarizeProcessor(Processor):
self.logger.warning("Page '%s' contains no text lines", page_id) self.logger.warning("Page '%s' contains no text lines", page_id)
for line in lines: for line in lines:
line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized') line_image, line_xywh = self.workspace.image_from_segment(line, page_image, page_xywh, feature_filter='binarized')
line_image_bin = cv2pil(self.binarizer.run(image=pil2cv(line_image), use_patches=True)) line_image_bin = cv2pil(self.binarizer.run_single(image=pil2cv(line_image), use_patches=True))
# update PAGE (reference the image file): # update PAGE (reference the image file):
line_image_ref = AlternativeImageType(comments=line_xywh['features'] + ',binarized') line_image_ref = AlternativeImageType(comments=line_xywh['features'] + ',binarized')
line.add_AlternativeImage(region_image_ref) line.add_AlternativeImage(region_image_ref)

View file

@ -25,7 +25,7 @@ class SbbBinarizer:
def __init__(self, model_dir, logger=None): def __init__(self, model_dir, logger=None):
self.model_dir = model_dir self.model_dir = model_dir
self.log = logger if logger else logging.getLogger('SbbBinarizer') self.logger = logger if logger else logging.getLogger('SbbBinarizer')
self.start_new_session() self.start_new_session()
@ -315,47 +315,30 @@ class SbbBinarizer:
prediction_true = prediction_true.astype(np.uint8) prediction_true = prediction_true.astype(np.uint8)
return prediction_true[:,:,0] return prediction_true[:,:,0]
def run(self, image=None, image_path=None, output=None, use_patches=False, dir_in=None): def run(self, image_path=None, output=None, dir_in=None, use_patches=False, overwrite=False):
# print(dir_in,'dir_in') if dir_in:
if not dir_in: ls_imgs = [(os.path.join(dir_in, image_filename),
if (image is not None and image_path is not None) or \ os.path.join(output, os.path.splitext(image_filename)[0] + '.png'))
(image is None and image_path is None): for image_filename in filter(is_image_filename,
raise ValueError("Must pass either a opencv2 image or an image_path") os.listdir(dir_in))]
if image_path is not None:
image = cv2.imread(image_path)
img_last = 0
for n, (model, model_file) in enumerate(zip(self.models, self.model_files)):
self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files)))
res = self.predict(model, image, use_patches)
img_fin = np.zeros((res.shape[0], res.shape[1], 3))
res[:, :][res[:, :] == 0] = 2
res = res - 1
res = res * 255
img_fin[:, :, 0] = res
img_fin[:, :, 1] = res
img_fin[:, :, 2] = res
img_fin = img_fin.astype(np.uint8)
img_fin = (res[:, :] == 0) * 255
img_last = img_last + img_fin
kernel = np.ones((5, 5), np.uint8)
img_last[:, :][img_last[:, :] > 0] = 255
img_last = (img_last[:, :] == 0) * 255
if output:
cv2.imwrite(output, img_last)
return img_last
else: else:
ls_imgs = list(filter(is_image_filename, os.listdir(dir_in))) ls_imgs = [(image_path, output)]
for image_name in ls_imgs:
image_stem = image_name.split('.')[0] for input_path, output_path in ls_imgs:
print(image_name,'image_name') print(input_path, 'image_name')
image = cv2.imread(os.path.join(dir_in,image_name) ) if os.path.exists(output_path):
if overwrite:
self.logger.warning("will overwrite existing output file '%s'", output_path)
else:
self.logger.warning("will skip input for existing output file '%s'", output_path)
image = cv2.imread(input_path)
result = self.run_single(image, use_patches)
cv2.imwrite(output_path, result)
def run_single(self, image: np.ndarray, use_patches=False):
img_last = 0 img_last = 0
for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): for n, (model, model_file) in enumerate(zip(self.models, self.model_files)):
self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) self.logger.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files)))
res = self.predict(model, image, use_patches) res = self.predict(model, image, use_patches)
@ -374,5 +357,4 @@ class SbbBinarizer:
kernel = np.ones((5, 5), np.uint8) kernel = np.ones((5, 5), np.uint8)
img_last[:, :][img_last[:, :] > 0] = 255 img_last[:, :][img_last[:, :] > 0] = 255
img_last = (img_last[:, :] == 0) * 255 img_last = (img_last[:, :] == 0) * 255
return img_last
cv2.imwrite(os.path.join(output, image_stem + '.png'), img_last)

File diff suppressed because it is too large Load diff

View file

@ -14,21 +14,16 @@ from shapely.ops import unary_union, nearest_points
from .rotate import rotate_image, rotation_image_new from .rotate import rotate_image, rotation_image_new
def contours_in_same_horizon(cy_main_hor): def contours_in_same_horizon(cy_main_hor):
X1 = np.zeros((len(cy_main_hor), len(cy_main_hor))) """
X2 = np.zeros((len(cy_main_hor), len(cy_main_hor))) Takes an array of y coords, identifies all pairs among them
which are close to each other, and returns all such pairs
X1[0::1, :] = cy_main_hor[:] by index into the array.
X2 = X1.T """
sort = np.argsort(cy_main_hor)
X_dif = np.abs(X2 - X1) same = np.diff(cy_main_hor[sort] <= 20)
args_help = np.array(range(len(cy_main_hor))) # groups = np.split(sort, np.arange(len(cy_main_hor) - 1)[~same] + 1)
all_args = [] same = np.flatnonzero(same)
for i in range(len(cy_main_hor)): return np.stack((sort[:-1][same], sort[1:][same])).T
list_h = list(args_help[X_dif[i, :] <= 20])
list_h.append(i)
if len(list_h) > 1:
all_args.append(list(set(list_h)))
return np.unique(np.array(all_args, dtype=object))
def find_contours_mean_y_diff(contours_main): def find_contours_mean_y_diff(contours_main):
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]

View file

@ -89,7 +89,7 @@ class EynollahXmlWriter:
def build_pagexml_no_full_layout( def build_pagexml_no_full_layout(
self, found_polygons_text_region, self, found_polygons_text_region,
page_coord, order_of_texts, id_of_texts, page_coord, order_of_texts,
all_found_textline_polygons, all_found_textline_polygons,
all_box_coord, all_box_coord,
found_polygons_text_region_img, found_polygons_text_region_img,
@ -102,7 +102,7 @@ class EynollahXmlWriter:
**kwargs): **kwargs):
return self.build_pagexml_full_layout( return self.build_pagexml_full_layout(
found_polygons_text_region, [], found_polygons_text_region, [],
page_coord, order_of_texts, id_of_texts, page_coord, order_of_texts,
all_found_textline_polygons, [], all_found_textline_polygons, [],
all_box_coord, [], all_box_coord, [],
found_polygons_text_region_img, found_polygons_tables, [], found_polygons_text_region_img, found_polygons_tables, [],
@ -116,7 +116,7 @@ class EynollahXmlWriter:
def build_pagexml_full_layout( def build_pagexml_full_layout(
self, self,
found_polygons_text_region, found_polygons_text_region_h, found_polygons_text_region, found_polygons_text_region_h,
page_coord, order_of_texts, id_of_texts, page_coord, order_of_texts,
all_found_textline_polygons, all_found_textline_polygons_h, all_found_textline_polygons, all_found_textline_polygons_h,
all_box_coord, all_box_coord_h, all_box_coord, all_box_coord_h,
found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals,