mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-03-24 08:02:45 +01:00
do_prediction*: avoid 3-channel results, simplify further…
- `do_prediction/_new_concept`: avoid unnecessary `np.repeat` on results, aggregate intermediate artificial class mask and confidence data in extra arrays - callers: avoid unnecessary thresholding the result arrays - callers: adapt (no need to slice into channels) - simplify by refactoring thresholding and skeletonization into function `seg_mask_label` - `extract_text_regions*`: drop unused second result array - `textline_contours`: avoid calculating unused unpatched prediction
This commit is contained in:
parent
ff7dc31a68
commit
3370a3aa85
3 changed files with 173 additions and 182 deletions
|
|
@ -35,7 +35,6 @@ import numpy as np
|
|||
import shapely.affinity
|
||||
from scipy.signal import find_peaks
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
from skimage.morphology import skeletonize
|
||||
from ocrd_utils import tf_disable_interactive_logs
|
||||
import statistics
|
||||
|
||||
|
|
@ -86,6 +85,7 @@ from .utils import (
|
|||
box2rect,
|
||||
find_num_col,
|
||||
otsu_copy_binary,
|
||||
seg_mask_label,
|
||||
putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
|
||||
check_any_text_region_in_model_one_is_main_or_header_light,
|
||||
small_textlines_to_parent_adherence2,
|
||||
|
|
@ -474,7 +474,7 @@ class Eynollah:
|
|||
if self.input_binary:
|
||||
img = self.imread()
|
||||
prediction_bin = self.do_prediction(True, img, self.model_zoo.get("binarization"), n_batch_inference=5)
|
||||
prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
|
||||
prediction_bin = 255 * (prediction_bin == 0)
|
||||
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8)
|
||||
img= np.copy(prediction_bin)
|
||||
img_bin = prediction_bin
|
||||
|
|
@ -623,7 +623,8 @@ class Eynollah:
|
|||
|
||||
def do_prediction(
|
||||
self, patches, img, model,
|
||||
n_batch_inference=1, marginal_of_patch_percent=0.1,
|
||||
n_batch_inference=1,
|
||||
marginal_of_patch_percent=0.1,
|
||||
thresholding_for_some_classes=False,
|
||||
thresholding_for_heading=False,
|
||||
thresholding_for_artificial_class=False,
|
||||
|
|
@ -638,26 +639,24 @@ class Eynollah:
|
|||
if not patches:
|
||||
img_h_page = img.shape[0]
|
||||
img_w_page = img.shape[1]
|
||||
img = img / float(255.0)
|
||||
img = img / 255.0
|
||||
img = resize_image(img, img_height_model, img_width_model)
|
||||
|
||||
label_p_pred = model.predict(img[np.newaxis], verbose=0)[0]
|
||||
seg = np.argmax(label_p_pred, axis=2)
|
||||
|
||||
if thresholding_for_artificial_class:
|
||||
seg_art = label_p_pred[:, :, artificial_class]
|
||||
seg_art = (seg_art >= threshold_art_class).astype(int)
|
||||
|
||||
seg[skeletonize(seg_art)] = artificial_class
|
||||
seg_mask_label(
|
||||
seg, label_p_pred[:, :, artificial_class] >= threshold_art_class,
|
||||
label=artificial_class,
|
||||
skeletonize=True)
|
||||
|
||||
if thresholding_for_heading:
|
||||
seg_header = label_p_pred[:, :, 2]
|
||||
seg_mask_label(
|
||||
seg, label_p_pred[:, :, 2] >= 0.2,
|
||||
label=2)
|
||||
|
||||
seg[seg_header >= 0.2] = 2
|
||||
|
||||
seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
|
||||
prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
|
||||
return prediction_true
|
||||
return resize_image(seg, img_h_page, img_w_page).astype(np.uint8)
|
||||
|
||||
if img.shape[0] < img_height_model:
|
||||
img = resize_image(img, img_height_model, img.shape[1])
|
||||
|
|
@ -672,8 +671,9 @@ class Eynollah:
|
|||
#img = img.astype(np.float16)
|
||||
img_h = img.shape[0]
|
||||
img_w = img.shape[1]
|
||||
prediction_true = np.zeros((img_h, img_w, 3))
|
||||
mask_true = np.zeros((img_h, img_w))
|
||||
prediction = np.zeros((img_h, img_w), dtype=np.uint8)
|
||||
if thresholding_for_artificial_class:
|
||||
mask_artificial_class = np.zeros((img_h, img_w), dtype=bool)
|
||||
nxf = math.ceil(img_w / float(width_mid))
|
||||
nyf = math.ceil(img_h / float(height_mid))
|
||||
|
||||
|
|
@ -684,212 +684,6 @@ class Eynollah:
|
|||
list_y_u = []
|
||||
list_y_d = []
|
||||
|
||||
batch_indexer = 0
|
||||
img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3))
|
||||
for i in range(nxf):
|
||||
for j in range(nyf):
|
||||
index_x_d = i * width_mid
|
||||
index_x_u = index_x_d + img_width_model
|
||||
index_y_d = j * height_mid
|
||||
index_y_u = index_y_d + img_height_model
|
||||
if index_x_u > img_w:
|
||||
index_x_u = img_w
|
||||
index_x_d = img_w - img_width_model
|
||||
if index_y_u > img_h:
|
||||
index_y_u = img_h
|
||||
index_y_d = img_h - img_height_model
|
||||
|
||||
list_i_s.append(i)
|
||||
list_j_s.append(j)
|
||||
list_x_u.append(index_x_u)
|
||||
list_x_d.append(index_x_d)
|
||||
list_y_d.append(index_y_d)
|
||||
list_y_u.append(index_y_u)
|
||||
|
||||
img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :]
|
||||
batch_indexer += 1
|
||||
|
||||
if (batch_indexer == n_batch_inference or
|
||||
# last batch
|
||||
i == nxf - 1 and j == nyf - 1):
|
||||
self.logger.debug("predicting patches on %s", str(img_patch.shape))
|
||||
label_p_pred = model.predict(img_patch, verbose=0)
|
||||
seg = np.argmax(label_p_pred, axis=3)
|
||||
|
||||
if thresholding_for_some_classes:
|
||||
seg_not_base = label_p_pred[:,:,:,4]
|
||||
seg_not_base = (seg_not_base > 0.03).astype(int)
|
||||
|
||||
seg_line = label_p_pred[:,:,:,3]
|
||||
seg_line = (seg_line > 0.1).astype(int)
|
||||
|
||||
seg_background = label_p_pred[:,:,:,0]
|
||||
seg_background = (seg_background > 0.25).astype(int)
|
||||
|
||||
seg[seg_not_base==1]=4
|
||||
seg[seg_background==1]=0
|
||||
seg[(seg_line==1) & (seg==0)]=3
|
||||
|
||||
if thresholding_for_artificial_class:
|
||||
seg_art = label_p_pred[:, :, :, artificial_class]
|
||||
seg_art = (seg_art >= threshold_art_class).astype(int)
|
||||
|
||||
##seg[seg_art==1]=artificial_class
|
||||
|
||||
indexer_inside_batch = 0
|
||||
for i_batch, j_batch in zip(list_i_s, list_j_s):
|
||||
seg_in = seg[indexer_inside_batch]
|
||||
|
||||
if thresholding_for_artificial_class:
|
||||
seg_in_art = seg_art[indexer_inside_batch]
|
||||
|
||||
index_y_u_in = list_y_u[indexer_inside_batch]
|
||||
index_y_d_in = list_y_d[indexer_inside_batch]
|
||||
|
||||
index_x_u_in = list_x_u[indexer_inside_batch]
|
||||
index_x_d_in = list_x_d[indexer_inside_batch]
|
||||
|
||||
where = np.index_exp[index_y_d_in:index_y_u_in,
|
||||
index_x_d_in:index_x_u_in]
|
||||
if (i_batch == 0 and
|
||||
j_batch == 0):
|
||||
inbox = np.index_exp[0:-margin or None,
|
||||
0:-margin or None]
|
||||
elif (i_batch == nxf - 1 and
|
||||
j_batch == nyf - 1):
|
||||
inbox = np.index_exp[margin:,
|
||||
margin:]
|
||||
elif (i_batch == 0 and
|
||||
j_batch == nyf - 1):
|
||||
inbox = np.index_exp[margin:,
|
||||
0:-margin or None]
|
||||
elif (i_batch == nxf - 1 and
|
||||
j_batch == 0):
|
||||
inbox = np.index_exp[0:-margin or None,
|
||||
margin:]
|
||||
elif (i_batch == 0 and
|
||||
j_batch != 0 and
|
||||
j_batch != nyf - 1):
|
||||
inbox = np.index_exp[margin:-margin or None,
|
||||
0:-margin or None]
|
||||
elif (i_batch == nxf - 1 and
|
||||
j_batch != 0 and
|
||||
j_batch != nyf - 1):
|
||||
inbox = np.index_exp[margin:-margin or None,
|
||||
margin:]
|
||||
elif (i_batch != 0 and
|
||||
i_batch != nxf - 1 and
|
||||
j_batch == 0):
|
||||
inbox = np.index_exp[0:-margin or None,
|
||||
margin:-margin or None]
|
||||
elif (i_batch != 0 and
|
||||
i_batch != nxf - 1 and
|
||||
j_batch == nyf - 1):
|
||||
inbox = np.index_exp[margin:,
|
||||
margin:-margin or None]
|
||||
else:
|
||||
inbox = np.index_exp[margin:-margin or None,
|
||||
margin:-margin or None]
|
||||
prediction_true[where][inbox] = seg_in[inbox + (np.newaxis,)]
|
||||
if thresholding_for_artificial_class:
|
||||
prediction_true[where][inbox + (1,)] = seg_in_art[inbox]
|
||||
|
||||
indexer_inside_batch += 1
|
||||
|
||||
|
||||
list_i_s = []
|
||||
list_j_s = []
|
||||
list_x_u = []
|
||||
list_x_d = []
|
||||
list_y_u = []
|
||||
list_y_d = []
|
||||
|
||||
batch_indexer = 0
|
||||
img_patch[:] = 0
|
||||
|
||||
prediction_true = prediction_true.astype(np.uint8)
|
||||
|
||||
if thresholding_for_artificial_class:
|
||||
kernel_min = np.ones((3, 3), np.uint8)
|
||||
prediction_true[:,:,0][prediction_true[:,:,0]==artificial_class] = 0
|
||||
|
||||
skeleton_art = skeletonize(prediction_true[:,:,1]).astype(np.uint8)
|
||||
skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
|
||||
|
||||
prediction_true[:,:,0][skeleton_art==1]=artificial_class
|
||||
#del model
|
||||
gc.collect()
|
||||
return prediction_true
|
||||
|
||||
def do_prediction_new_concept(
|
||||
self, patches, img, model,
|
||||
n_batch_inference=1, marginal_of_patch_percent=0.1,
|
||||
thresholding_for_artificial_class=False,
|
||||
threshold_art_class=0.1,
|
||||
artificial_class=4,
|
||||
):
|
||||
|
||||
self.logger.debug("enter do_prediction_new_concept")
|
||||
img_height_model = model.layers[-1].output_shape[1]
|
||||
img_width_model = model.layers[-1].output_shape[2]
|
||||
|
||||
if not patches:
|
||||
img_h_page = img.shape[0]
|
||||
img_w_page = img.shape[1]
|
||||
img = img / 255.0
|
||||
img = resize_image(img, img_height_model, img_width_model)
|
||||
|
||||
label_p_pred = model.predict(img[np.newaxis], verbose=0)[0]
|
||||
seg = np.argmax(label_p_pred, axis=2)
|
||||
|
||||
seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2)
|
||||
prediction_true = resize_image(seg_color, img_h_page, img_w_page).astype(np.uint8)
|
||||
|
||||
if thresholding_for_artificial_class:
|
||||
kernel_min = np.ones((3, 3), np.uint8)
|
||||
seg_art = label_p_pred[:, :, artificial_class]
|
||||
seg_art = (seg_art >= threshold_art_class).astype(int)
|
||||
#seg[seg_art==1]=4
|
||||
seg_art = resize_image(seg_art, img_h_page, img_w_page).astype(np.uint8)
|
||||
|
||||
prediction_true[:,:,0][prediction_true[:,:,0]==artificial_class] = 0
|
||||
|
||||
skeleton_art = skeletonize(seg_art).astype(np.uint8)
|
||||
skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
|
||||
|
||||
prediction_true[:,:,0][skeleton_art==1] = artificial_class
|
||||
|
||||
seg_text = resize_image(label_p_pred[:, :, 1] , img_h_page, img_w_page)
|
||||
return prediction_true, seg_text
|
||||
|
||||
if img.shape[0] < img_height_model:
|
||||
img = resize_image(img, img_height_model, img.shape[1])
|
||||
if img.shape[1] < img_width_model:
|
||||
img = resize_image(img, img.shape[0], img_width_model)
|
||||
|
||||
self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model)
|
||||
margin = int(marginal_of_patch_percent * img_height_model)
|
||||
width_mid = img_width_model - 2 * margin
|
||||
height_mid = img_height_model - 2 * margin
|
||||
img = img / 255.0
|
||||
img = img.astype(np.float16)
|
||||
img_h = img.shape[0]
|
||||
img_w = img.shape[1]
|
||||
prediction_true = np.zeros((img_h, img_w, 3))
|
||||
confidence_matrix = np.zeros((img_h, img_w))
|
||||
mask_true = np.zeros((img_h, img_w))
|
||||
nxf = img_w / float(width_mid)
|
||||
nyf = img_h / float(height_mid)
|
||||
nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf)
|
||||
nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf)
|
||||
|
||||
list_i_s = []
|
||||
list_j_s = []
|
||||
list_x_u = []
|
||||
list_x_d = []
|
||||
list_y_u = []
|
||||
list_y_d = []
|
||||
|
||||
batch_indexer = 0
|
||||
img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3))
|
||||
for i in range(nxf):
|
||||
|
|
@ -923,9 +717,19 @@ class Eynollah:
|
|||
label_p_pred = model.predict(img_patch, verbose=0)
|
||||
seg = np.argmax(label_p_pred, axis=3)
|
||||
|
||||
if thresholding_for_some_classes:
|
||||
seg_mask_label(
|
||||
seg, label_p_pred[:,:,:,4] > 0.03,
|
||||
label=4) #
|
||||
seg_mask_label(
|
||||
seg, label_p_pred[:,:,:,0] > 0.25,
|
||||
label=0) # bg
|
||||
seg_mask_label(
|
||||
seg, label_p_pred[:,:,:,3] > 0.10 & seg == 0,
|
||||
label=3) # line
|
||||
|
||||
if thresholding_for_artificial_class:
|
||||
seg_art = label_p_pred[:, :, :, artificial_class]
|
||||
seg_art = (seg_art >= threshold_art_class).astype(int)
|
||||
seg_art = label_p_pred[:, :, :, artificial_class] >= threshold_art_class
|
||||
|
||||
indexer_inside_batch = 0
|
||||
for i_batch, j_batch in zip(list_i_s, list_j_s):
|
||||
|
|
@ -981,14 +785,193 @@ class Eynollah:
|
|||
else:
|
||||
inbox = np.index_exp[margin:-margin or None,
|
||||
margin:-margin or None]
|
||||
prediction_true[where][inbox] = seg_in[inbox + (np.newaxis,)]
|
||||
confidence_matrix[where][inbox] = label_p_pred[(0,) + inbox + (1,)]
|
||||
# rs: why is prediction_true 3ch when only 1st gets used?
|
||||
# artificial boundary class map should be extra array
|
||||
prediction[where][inbox] = seg_in[inbox]
|
||||
if thresholding_for_artificial_class:
|
||||
mask_artificial_class[where][inbox] = seg_in_art[inbox]
|
||||
|
||||
indexer_inside_batch += 1
|
||||
|
||||
|
||||
list_i_s = []
|
||||
list_j_s = []
|
||||
list_x_u = []
|
||||
list_x_d = []
|
||||
list_y_u = []
|
||||
list_y_d = []
|
||||
|
||||
batch_indexer = 0
|
||||
img_patch[:] = 0
|
||||
|
||||
if thresholding_for_artificial_class:
|
||||
seg_mask_label(prediction, mask_artificial_class,
|
||||
label=artificial_class,
|
||||
only=True,
|
||||
skeletonize=True,
|
||||
dilate=3)
|
||||
gc.collect()
|
||||
return prediction
|
||||
|
||||
def do_prediction_new_concept(
|
||||
self, patches, img, model,
|
||||
n_batch_inference=1,
|
||||
marginal_of_patch_percent=0.1,
|
||||
thresholding_for_artificial_class=False,
|
||||
threshold_art_class=0.1,
|
||||
artificial_class=4,
|
||||
):
|
||||
|
||||
self.logger.debug("enter do_prediction_new_concept (patches=%d)", patches)
|
||||
img_height_model = model.layers[-1].output_shape[1]
|
||||
img_width_model = model.layers[-1].output_shape[2]
|
||||
|
||||
img = img / 255.0
|
||||
img = img.astype(np.float16)
|
||||
|
||||
if not patches:
|
||||
img_h_page = img.shape[0]
|
||||
img_w_page = img.shape[1]
|
||||
img = resize_image(img, img_height_model, img_width_model)
|
||||
|
||||
label_p_pred = model.predict(img[np.newaxis], verbose=0)[0]
|
||||
seg = np.argmax(label_p_pred, axis=2)
|
||||
|
||||
prediction = resize_image(seg, img_h_page, img_w_page).astype(np.uint8)
|
||||
|
||||
if thresholding_for_artificial_class:
|
||||
mask = resize_image(label_p_pred[:, :, artificial_class],
|
||||
img_h_page, img_w_page) >= threshold_art_class
|
||||
seg_mask_label(prediction, mask,
|
||||
label=artificial_class,
|
||||
only=True,
|
||||
skeletonize=True,
|
||||
dilate=3)
|
||||
|
||||
conf_text = resize_image(label_p_pred[:, :, 1], img_h_page, img_w_page)
|
||||
return prediction, conf_text
|
||||
|
||||
if img.shape[0] < img_height_model:
|
||||
img = resize_image(img, img_height_model, img.shape[1])
|
||||
if img.shape[1] < img_width_model:
|
||||
img = resize_image(img, img.shape[0], img_width_model)
|
||||
|
||||
self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model)
|
||||
margin = int(marginal_of_patch_percent * img_height_model)
|
||||
width_mid = img_width_model - 2 * margin
|
||||
height_mid = img_height_model - 2 * margin
|
||||
img_h = img.shape[0]
|
||||
img_w = img.shape[1]
|
||||
prediction = np.zeros((img_h, img_w), dtype=np.uint8)
|
||||
confidence = np.zeros((img_h, img_w))
|
||||
if thresholding_for_artificial_class:
|
||||
mask_artificial_class = np.zeros((img_h, img_w), dtype=bool)
|
||||
nxf = math.ceil(img_w / float(width_mid))
|
||||
nyf = math.ceil(img_h / float(height_mid))
|
||||
|
||||
list_i_s = []
|
||||
list_j_s = []
|
||||
list_x_u = []
|
||||
list_x_d = []
|
||||
list_y_u = []
|
||||
list_y_d = []
|
||||
|
||||
batch_indexer = 0
|
||||
img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3))
|
||||
for i in range(nxf):
|
||||
for j in range(nyf):
|
||||
index_x_d = i * width_mid
|
||||
index_x_u = index_x_d + img_width_model
|
||||
index_y_d = j * height_mid
|
||||
index_y_u = index_y_d + img_height_model
|
||||
if index_x_u > img_w:
|
||||
index_x_u = img_w
|
||||
index_x_d = img_w - img_width_model
|
||||
if index_y_u > img_h:
|
||||
index_y_u = img_h
|
||||
index_y_d = img_h - img_height_model
|
||||
|
||||
list_i_s.append(i)
|
||||
list_j_s.append(j)
|
||||
list_x_u.append(index_x_u)
|
||||
list_x_d.append(index_x_d)
|
||||
list_y_d.append(index_y_d)
|
||||
list_y_u.append(index_y_u)
|
||||
|
||||
img_patch[batch_indexer] = img[index_y_d:index_y_u,
|
||||
index_x_d:index_x_u]
|
||||
batch_indexer += 1
|
||||
|
||||
if (batch_indexer == n_batch_inference or
|
||||
# last batch
|
||||
i == nxf - 1 and j == nyf - 1):
|
||||
self.logger.debug("predicting patches on %s", str(img_patch.shape))
|
||||
label_p_pred = model.predict(img_patch,verbose=0)
|
||||
seg = np.argmax(label_p_pred, axis=3)
|
||||
conf = label_p_pred[:, :, :, 1]
|
||||
|
||||
if thresholding_for_artificial_class:
|
||||
seg_art = label_p_pred[:, :, :, artificial_class] >= threshold_art_class
|
||||
|
||||
indexer_inside_batch = 0
|
||||
for i_batch, j_batch in zip(list_i_s, list_j_s):
|
||||
seg_in = seg[indexer_inside_batch]
|
||||
conf_in = conf[indexer_inside_batch]
|
||||
|
||||
if thresholding_for_artificial_class:
|
||||
seg_in_art = seg_art[indexer_inside_batch]
|
||||
|
||||
index_y_u_in = list_y_u[indexer_inside_batch]
|
||||
index_y_d_in = list_y_d[indexer_inside_batch]
|
||||
|
||||
index_x_u_in = list_x_u[indexer_inside_batch]
|
||||
index_x_d_in = list_x_d[indexer_inside_batch]
|
||||
|
||||
where = np.index_exp[index_y_d_in:index_y_u_in,
|
||||
index_x_d_in:index_x_u_in]
|
||||
if (i_batch == 0 and
|
||||
j_batch == 0):
|
||||
inbox = np.index_exp[0:-margin or None,
|
||||
0:-margin or None]
|
||||
elif (i_batch == nxf - 1 and
|
||||
j_batch == nyf - 1):
|
||||
inbox = np.index_exp[margin:,
|
||||
margin:]
|
||||
elif (i_batch == 0 and
|
||||
j_batch == nyf - 1):
|
||||
inbox = np.index_exp[margin:,
|
||||
0:-margin or None]
|
||||
elif (i_batch == nxf - 1 and
|
||||
j_batch == 0):
|
||||
inbox = np.index_exp[0:-margin or None,
|
||||
margin:]
|
||||
elif (i_batch == 0 and
|
||||
j_batch != 0 and
|
||||
j_batch != nyf - 1):
|
||||
inbox = np.index_exp[margin:-margin or None,
|
||||
0:-margin or None]
|
||||
elif (i_batch == nxf - 1 and
|
||||
j_batch != 0 and
|
||||
j_batch != nyf - 1):
|
||||
inbox = np.index_exp[margin:-margin or None,
|
||||
margin:]
|
||||
elif (i_batch != 0 and
|
||||
i_batch != nxf - 1 and
|
||||
j_batch == 0):
|
||||
inbox = np.index_exp[0:-margin or None,
|
||||
margin:-margin or None]
|
||||
elif (i_batch != 0 and
|
||||
i_batch != nxf - 1 and
|
||||
j_batch == nyf - 1):
|
||||
inbox = np.index_exp[margin:,
|
||||
margin:-margin or None]
|
||||
else:
|
||||
inbox = np.index_exp[margin:-margin or None,
|
||||
margin:-margin or None]
|
||||
prediction[where][inbox] = seg_in[inbox]
|
||||
confidence[where][inbox] = conf_in[inbox]
|
||||
# rs: why does confidence_matrix only get text-label scores?
|
||||
# should be scores at final argmax
|
||||
if thresholding_for_artificial_class:
|
||||
prediction_true[where][inbox + (1,)] = seg_in_art[inbox]
|
||||
mask_artificial_class[where][inbox] = seg_in_art[inbox]
|
||||
|
||||
indexer_inside_batch += 1
|
||||
|
||||
|
|
@ -1002,29 +985,25 @@ class Eynollah:
|
|||
batch_indexer = 0
|
||||
img_patch[:] = 0
|
||||
|
||||
prediction_true = prediction_true.astype(np.uint8)
|
||||
|
||||
if thresholding_for_artificial_class:
|
||||
kernel_min = np.ones((3, 3), np.uint8)
|
||||
prediction_true[:,:,0][prediction_true[:,:,0]==artificial_class] = 0
|
||||
|
||||
skeleton_art = skeletonize(prediction_true[:,:,1]).astype(np.uint8)
|
||||
skeleton_art = cv2.dilate(skeleton_art, kernel_min, iterations=1)
|
||||
|
||||
prediction_true[:,:,0][skeleton_art==1]=artificial_class
|
||||
seg_mask_label(prediction, mask_artificial_class,
|
||||
label=artificial_class,
|
||||
only=True,
|
||||
skeletonize=True,
|
||||
dilate=3)
|
||||
gc.collect()
|
||||
return prediction, confidence
|
||||
|
||||
|
||||
gc.collect()
|
||||
return prediction_true, confidence_matrix
|
||||
|
||||
def extract_page(self):
|
||||
self.logger.debug("enter extract_page")
|
||||
cont_page = []
|
||||
if not self.ignore_page_extraction:
|
||||
img = np.copy(self.image)#cv2.GaussianBlur(self.image, (5, 5), 0)
|
||||
img_page_prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
|
||||
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
|
||||
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
##thresh = cv2.dilate(thresh, KERNEL, iterations=3)
|
||||
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
|
||||
contours, _ = cv2.findContours(prediction, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
if len(contours)>0:
|
||||
cnt_size = np.array([cv2.contourArea(contours[j])
|
||||
|
|
@ -1068,12 +1047,9 @@ class Eynollah:
|
|||
else:
|
||||
img = self.imread()
|
||||
img = cv2.GaussianBlur(img, (5, 5), 0)
|
||||
img_page_prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
|
||||
|
||||
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
|
||||
_, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
thresh = cv2.dilate(thresh, KERNEL, iterations=3)
|
||||
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
prediction = self.do_prediction(False, img, self.model_zoo.get("page"))
|
||||
prediction = cv2.dilate(prediction, KERNEL, iterations=3)
|
||||
contours, _ = cv2.findContours(prediction, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
if len(contours)>0:
|
||||
cnt_size = np.array([cv2.contourArea(contours[j])
|
||||
for j in range(len(contours))])
|
||||
|
|
@ -1122,7 +1098,7 @@ class Eynollah:
|
|||
thresholding_for_heading=thresholding_for_heading)
|
||||
prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
|
||||
self.logger.debug("exit extract_text_regions")
|
||||
return prediction_regions, prediction_regions
|
||||
return prediction_regions
|
||||
|
||||
def extract_text_regions(self, img, patches, cols):
|
||||
self.logger.debug("enter extract_text_regions")
|
||||
|
|
@ -1133,7 +1109,7 @@ class Eynollah:
|
|||
prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1)
|
||||
prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
|
||||
self.logger.debug("exit extract_text_regions")
|
||||
return prediction_regions, None
|
||||
return prediction_regions
|
||||
|
||||
def get_textlines_of_a_textregion_sorted(self, textlines_textregion, cx_textline, cy_textline, w_h_textline):
|
||||
N = len(cy_textline)
|
||||
|
|
@ -1280,26 +1256,15 @@ class Eynollah:
|
|||
threshold_art_class=self.threshold_art_class_textline)
|
||||
|
||||
prediction_textline = resize_image(prediction_textline, img_h, img_w)
|
||||
textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1
|
||||
|
||||
old_art = np.copy(textline_mask_tot_ea_art)
|
||||
#prediction_textline_longshot = self.do_prediction(False, img, self.model_zoo.get("textline"))
|
||||
#prediction_textline_longshot = resize_image(prediction_textline_longshot, img_h, img_w)
|
||||
|
||||
textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1
|
||||
textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8')
|
||||
|
||||
prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1
|
||||
|
||||
#cv2.imwrite('prediction_textline2.png', prediction_textline[:,:,0])
|
||||
|
||||
prediction_textline_longshot = self.do_prediction(False, img, self.model_zoo.get("textline"))
|
||||
prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w)
|
||||
|
||||
|
||||
#cv2.imwrite('prediction_textline.png', prediction_textline[:,:,0])
|
||||
#sys.exit()
|
||||
self.logger.debug('exit textline_contours')
|
||||
return ((prediction_textline[:, :, 0]==1).astype(np.uint8),
|
||||
(prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8))
|
||||
return ((prediction_textline==1).astype(np.uint8),
|
||||
#(prediction_textline_longshot==1).astype(np.uint8),
|
||||
None
|
||||
)
|
||||
|
||||
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier):
|
||||
self.logger.debug("enter get_regions_light_v")
|
||||
|
|
@ -1334,7 +1299,6 @@ class Eynollah:
|
|||
###prediction_bin = self.do_prediction(True, img_resized, self.model_zoo.get_model("binarization"), n_batch_inference=5)
|
||||
|
||||
####print("inside bin ", time.time()-t_bin)
|
||||
###prediction_bin=prediction_bin[:,:,0]
|
||||
###prediction_bin = (prediction_bin[:,:]==0)*1
|
||||
###prediction_bin = prediction_bin*255
|
||||
|
||||
|
|
@ -1374,7 +1338,7 @@ class Eynollah:
|
|||
thresholding_for_artificial_class=True,
|
||||
threshold_art_class=self.threshold_art_class_layout)
|
||||
else:
|
||||
prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3))
|
||||
prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1]))
|
||||
confidence_matrix = np.zeros((self.image_org.shape[0], self.image_org.shape[1]))
|
||||
prediction_regions_page, confidence_matrix_page = self.do_prediction_new_concept(
|
||||
False, self.image_page_org_size, self.model_zoo.get("region_1_2"), n_batch_inference=1,
|
||||
|
|
@ -1398,13 +1362,12 @@ class Eynollah:
|
|||
###n_batch_inference=3,
|
||||
###thresholding_for_some_classes=True)
|
||||
#print("inside 3 ", time.time()-t_in)
|
||||
#plt.imshow(prediction_regions_org[:,:,0])
|
||||
#plt.imshow(prediction_regions_org[:,:])
|
||||
#plt.show()
|
||||
|
||||
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
||||
confidence_matrix = resize_image(confidence_matrix, img_height_h, img_width_h )
|
||||
img_bin = resize_image(img_bin, img_height_h, img_width_h )
|
||||
prediction_regions_org=prediction_regions_org[:,:,0]
|
||||
|
||||
mask_seps_only = (prediction_regions_org[:,:] == 3)*1
|
||||
mask_texts_only = (prediction_regions_org[:,:] ==1)*1
|
||||
|
|
@ -1795,7 +1758,7 @@ class Eynollah:
|
|||
patches = False
|
||||
prediction_table, _ = self.do_prediction_new_concept(patches, img, self.model_zoo.get("table"))
|
||||
prediction_table = prediction_table.astype(np.int16)
|
||||
return prediction_table[:,:,0]
|
||||
return prediction_table
|
||||
|
||||
def run_graphics_and_columns_light(
|
||||
self, text_regions_p_1, textline_mask_tot_ea,
|
||||
|
|
@ -1972,12 +1935,12 @@ class Eynollah:
|
|||
textline_mask_tot_d = rotate_image(textline_mask_tot, slope_deskew)
|
||||
text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
|
||||
table_prediction_n = rotate_image(table_prediction, slope_deskew)
|
||||
regions_without_separators_d = (text_regions_p_d[:, :] == 1) * 1
|
||||
regions_without_separators_d = (text_regions_p_d == 1) * 1
|
||||
if self.tables:
|
||||
regions_without_separators_d[table_prediction_n[:,:] == 1] = 1
|
||||
regions_without_separators = (text_regions_p[:, :] == 1) * 1
|
||||
# ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1
|
||||
#self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions)
|
||||
regions_without_separators_d[table_prediction_n == 1] = 1
|
||||
regions_without_separators = (text_regions_p == 1) * 1
|
||||
# ( (text_regions_p==1) | (text_regions_p==2) )*1
|
||||
#self.return_regions_without_separators_new(text_regions_p,img_only_regions)
|
||||
#print(time.time()-t_0_box,'time box in 1')
|
||||
if self.tables:
|
||||
regions_without_separators[table_prediction ==1 ] = 1
|
||||
|
|
@ -1999,10 +1962,10 @@ class Eynollah:
|
|||
if not erosion_hurts:
|
||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||
regions_without_separators = regions_without_separators.astype(np.uint8)
|
||||
regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6)
|
||||
regions_without_separators = cv2.erode(regions_without_separators, KERNEL, iterations=6)
|
||||
else:
|
||||
regions_without_separators_d = regions_without_separators_d.astype(np.uint8)
|
||||
regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6)
|
||||
regions_without_separators_d = cv2.erode(regions_without_separators_d, KERNEL, iterations=6)
|
||||
#print(time.time()-t_0_box,'time box in 3')
|
||||
t1 = time.time()
|
||||
if np.abs(slope_deskew) < SLOPE_THRESHOLD:
|
||||
|
|
@ -2026,14 +1989,11 @@ class Eynollah:
|
|||
if self.tables:
|
||||
text_regions_p[table_prediction == 1] = 10
|
||||
img_revised_tab = text_regions_p[:, :]
|
||||
else:
|
||||
img_revised_tab = text_regions_p[:,:]
|
||||
#img_revised_tab = text_regions_p[:, :]
|
||||
polygons_of_images = return_contours_of_interested_region(text_regions_p, 2)
|
||||
|
||||
label_marginalia = 4
|
||||
min_area_mar = 0.00001
|
||||
marginal_mask = (text_regions_p[:,:]==label_marginalia)*1
|
||||
marginal_mask = (text_regions_p==label_marginalia)*1
|
||||
marginal_mask = marginal_mask.astype('uint8')
|
||||
marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2)
|
||||
|
||||
|
|
@ -2093,7 +2053,7 @@ class Eynollah:
|
|||
|
||||
image_page = image_page.astype(np.uint8)
|
||||
#print("full inside 1", time.time()- t_full0)
|
||||
regions_fully, regions_fully_only_drop = self.extract_text_regions_new(
|
||||
regions_fully = self.extract_text_regions_new(
|
||||
img_bin_light,
|
||||
False, cols=num_col_classifier)
|
||||
#print("full inside 2", time.time()- t_full0)
|
||||
|
|
@ -2103,36 +2063,33 @@ class Eynollah:
|
|||
|
||||
# the separators in full layout will not be written on layout
|
||||
if not self.reading_order_machine_based:
|
||||
text_regions_p[:,:][regions_fully[:,:,0]==5]=6
|
||||
###regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4
|
||||
text_regions_p[regions_fully==5]=6
|
||||
|
||||
#text_regions_p[:,:][regions_fully[:,:,0]==6]=6
|
||||
##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p)
|
||||
##regions_fully[:, :, 0][regions_fully_only_drop[:, :] == 4] = 4
|
||||
#text_regions_p[:,:][regions_fully[:,:]==6]=6
|
||||
drop_capital_label_in_full_layout_model = 3
|
||||
|
||||
drops = (regions_fully[:,:,0]==drop_capital_label_in_full_layout_model)*1
|
||||
drops = (regions_fully==drop_capital_label_in_full_layout_model)*1
|
||||
drops= drops.astype(np.uint8)
|
||||
|
||||
regions_fully[:,:,0][regions_fully[:,:,0]==drop_capital_label_in_full_layout_model] = 1
|
||||
regions_fully[regions_fully==drop_capital_label_in_full_layout_model] = 1
|
||||
|
||||
drops = cv2.erode(drops[:,:], KERNEL, iterations=1)
|
||||
regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model
|
||||
drops = cv2.erode(drops, KERNEL, iterations=1)
|
||||
regions_fully[drops==1] = drop_capital_label_in_full_layout_model
|
||||
|
||||
regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(
|
||||
regions_fully, drop_capital_label_in_full_layout_model, text_regions_p)
|
||||
##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier)
|
||||
##regions_fully_np = self.extract_text_regions(image_page, False, cols=num_col_classifier)
|
||||
##if num_col_classifier > 2:
|
||||
##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0
|
||||
##regions_fully_np[regions_fully_np == 4] = 0
|
||||
##else:
|
||||
##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p)
|
||||
|
||||
###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully,
|
||||
### regions_fully_np, img_only_regions)
|
||||
# plt.imshow(regions_fully[:,:,0])
|
||||
# plt.imshow(regions_fully)
|
||||
# plt.show()
|
||||
text_regions_p[:, :][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 4
|
||||
####text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4
|
||||
text_regions_p[regions_fully == drop_capital_label_in_full_layout_model] = 4
|
||||
####text_regions_p[regions_fully_np == 4] = 4
|
||||
#plt.imshow(text_regions_p)
|
||||
#plt.show()
|
||||
####if not self.tables:
|
||||
|
|
@ -2141,14 +2098,14 @@ class Eynollah:
|
|||
text_regions_p_d = rotate_image(text_regions_p, slope_deskew)
|
||||
regions_fully_n = rotate_image(regions_fully, slope_deskew)
|
||||
if not self.tables:
|
||||
regions_without_separators_d = (text_regions_p_d[:, :] == 1) * 1
|
||||
regions_without_separators_d = (text_regions_p_d == 1) * 1
|
||||
else:
|
||||
text_regions_p_d = None
|
||||
textline_mask_tot_d = None
|
||||
regions_without_separators_d = None
|
||||
if not self.tables:
|
||||
regions_without_separators = (text_regions_p[:, :] == 1) * 1
|
||||
img_revised_tab = np.copy(text_regions_p[:, :])
|
||||
regions_without_separators = (text_regions_p == 1) * 1
|
||||
img_revised_tab = np.copy(text_regions_p)
|
||||
polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5)
|
||||
|
||||
self.logger.debug('exit run_boxes_full_layout')
|
||||
|
|
@ -2853,7 +2810,7 @@ class Eynollah:
|
|||
textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0
|
||||
|
||||
|
||||
text_only = (img_revised_tab[:, :] == 1) * 1
|
||||
text_only = (img_revised_tab == 1) * 1
|
||||
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
|
||||
text_only_d = ((text_regions_p_d[:, :] == 1)) * 1
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ from shapely import geometry
|
|||
import cv2
|
||||
from scipy.signal import find_peaks
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
from skimage import morphology
|
||||
|
||||
from .is_nan import isNaN
|
||||
from .contour import (contours_in_same_horizon,
|
||||
|
|
@ -220,20 +221,20 @@ def find_features_of_lines(contours_main):
|
|||
def boosting_headers_by_longshot_region_segmentation(textregion_pre_p, textregion_pre_np, img_only_text):
|
||||
textregion_pre_p_org = np.copy(textregion_pre_p)
|
||||
# 4 is drop capitals
|
||||
headers_in_longshot = textregion_pre_np[:, :, 0] == 2
|
||||
#headers_in_longshot = ((textregion_pre_np[:,:,0]==2) |
|
||||
# (textregion_pre_np[:,:,0]==1))
|
||||
textregion_pre_p[:, :, 0][headers_in_longshot &
|
||||
(textregion_pre_p[:, :, 0] != 4)] = 2
|
||||
textregion_pre_p[:, :, 0][textregion_pre_p[:, :, 0] == 1] = 0
|
||||
headers_in_longshot = textregion_pre_np == 2
|
||||
#headers_in_longshot = ((textregion_pre_np==2) |
|
||||
# (textregion_pre_np==1))
|
||||
textregion_pre_p[headers_in_longshot &
|
||||
(textregion_pre_p != 4)] = 2
|
||||
textregion_pre_p[textregion_pre_p == 1] = 0
|
||||
# earlier it was so, but by this manner the drop capitals are also deleted
|
||||
# textregion_pre_p[:,:,0][(img_only_text[:,:]==1) &
|
||||
# (textregion_pre_p[:,:,0]!=7) &
|
||||
# (textregion_pre_p[:,:,0]!=2)] = 1
|
||||
textregion_pre_p[:, :, 0][(img_only_text[:, :] == 1) &
|
||||
(textregion_pre_p[:, :, 0] != 7) &
|
||||
(textregion_pre_p[:, :, 0] != 4) &
|
||||
(textregion_pre_p[:, :, 0] != 2)] = 1
|
||||
# textregion_pre_p[(img_only_text[:,:]==1) &
|
||||
# (textregion_pre_p!=7) &
|
||||
# (textregion_pre_p!=2)] = 1
|
||||
textregion_pre_p[(img_only_text[:, :] == 1) &
|
||||
(textregion_pre_p != 7) &
|
||||
(textregion_pre_p != 4) &
|
||||
(textregion_pre_p != 2)] = 1
|
||||
return textregion_pre_p
|
||||
|
||||
def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
|
||||
|
|
@ -754,7 +755,7 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1):
|
|||
return layout_no_patch
|
||||
|
||||
def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label, text_regions_p):
|
||||
drop_only = (layout_in_patch[:, :, 0] == drop_capital_label) * 1
|
||||
drop_only = (layout_in_patch == drop_capital_label) * 1
|
||||
contours_drop, hir_on_drop = return_contours_of_image(drop_only)
|
||||
contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop)
|
||||
|
||||
|
|
@ -772,7 +773,6 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop
|
|||
for jj in range(len(contours_drop_parent)):
|
||||
x, y, w, h = cv2.boundingRect(contours_drop_parent[jj])
|
||||
box = slice(y, y + h), slice(x, x + w)
|
||||
box0 = box + (0,)
|
||||
mask_of_drop_cpaital_in_early_layout = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1]))
|
||||
mask_of_drop_cpaital_in_early_layout[box] = text_regions_p[box]
|
||||
|
||||
|
|
@ -783,12 +783,12 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop
|
|||
percent_text_to_all_in_drop = all_drop_capital_pixels_which_is_text_in_early_lo / float(all_drop_capital_pixels)
|
||||
if (areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) / float(w * h) > 0.6 and
|
||||
percent_text_to_all_in_drop >= 0.3):
|
||||
layout_in_patch[box0] = drop_capital_label
|
||||
layout_in_patch[box] = drop_capital_label
|
||||
else:
|
||||
layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = drop_capital_label
|
||||
layout_in_patch[box0][layout_in_patch[box0] == 0] = drop_capital_label
|
||||
layout_in_patch[box0][layout_in_patch[box0] == 4] = drop_capital_label# images
|
||||
#layout_in_patch[box0][layout_in_patch[box0] == drop_capital_label] = 1#drop_capital_label
|
||||
mask = ((layout_in_patch[box] == drop_capital_label) |
|
||||
(layout_in_patch[box] == 0) |
|
||||
(layout_in_patch[box] == 4))
|
||||
layout_in_patch[box][mask] = drop_capital_label
|
||||
|
||||
return layout_in_patch
|
||||
|
||||
|
|
@ -917,7 +917,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(
|
|||
|
||||
all_pixels = (img == 255).sum()
|
||||
pixels_header=((img == 255) &
|
||||
(regions_model_full[:,:,0]==2)).sum()
|
||||
(regions_model_full==2)).sum()
|
||||
pixels_main = all_pixels - pixels_header
|
||||
|
||||
if (( pixels_header / float(pixels_main) >= 0.6 and
|
||||
|
|
@ -1947,3 +1947,38 @@ def ensure_array(obj: Iterable) -> np.ndarray:
|
|||
if not isinstance(obj, np.ndarray):
|
||||
return np.fromiter(obj, object)
|
||||
return obj
|
||||
|
||||
def seg_mask_label(segmap:np.ndarray,
|
||||
mask:np.ndarray,
|
||||
only:bool=False,
|
||||
label:int=2,
|
||||
skeletonize:bool=False,
|
||||
dilate:int=0
|
||||
) -> None:
|
||||
"""
|
||||
overwrite an existing segmentation map from a binary mask with a given label
|
||||
|
||||
Args:
|
||||
segmap: integer array of existing segmentation labels ([H, W] or [B, H, W] shape)
|
||||
mask: boolean array for specific label
|
||||
Keyword Args:
|
||||
label: the class label to be written
|
||||
only: whether to suppress the `label` outside `mask`
|
||||
skeletonize: whether to transform the mask to its skeleton
|
||||
dilate: whether to also apply dilatation after this (convolution with square kernel of given size)
|
||||
|
||||
Use this to enforce specific confidence thresholds or rules after segmentation.
|
||||
"""
|
||||
if not mask.any():
|
||||
return
|
||||
if only:
|
||||
segmap[segmap == label] = 0
|
||||
if skeletonize:
|
||||
if mask.ndim == 3:
|
||||
mask = np.stack(morphology.skeletonize(m) for m in mask)
|
||||
else:
|
||||
mask = morphology.skeletonize(mask)
|
||||
if dilate:
|
||||
kernel = np.ones((dilate, dilate), np.uint8)
|
||||
mask = cv2.dilate(mask.astype(np.uint8), kernel, iterations=1) > 0
|
||||
segmap[mask] = label
|
||||
|
|
|
|||
|
|
@ -501,7 +501,7 @@ def adhere_drop_capital_region_into_corresponding_textline(
|
|||
|
||||
def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1):
|
||||
|
||||
drop_only = (layout_no_patch[:, :, 0] == 4) * 1
|
||||
drop_only = (layout_no_patch == 4) * 1
|
||||
contours_drop, hir_on_drop = return_contours_of_image(drop_only)
|
||||
contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop)
|
||||
|
||||
|
|
@ -529,9 +529,8 @@ def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1):
|
|||
if (((map_of_drop_contour_bb == 1) * 1).sum() / float(((map_of_drop_contour_bb == 5) * 1).sum()) * 100) >= 15:
|
||||
contours_drop_parent_final.append(contours_drop_parent[jj])
|
||||
|
||||
layout_no_patch[:, :, 0][layout_no_patch[:, :, 0] == 4] = 0
|
||||
|
||||
layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=(4, 4, 4))
|
||||
layout_no_patch[layout_no_patch == 4] = 0
|
||||
layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=4)
|
||||
|
||||
return layout_no_patch
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue