adding the binarization model and option to binarize input document for the cases like dark, stronly bright and other ones

pull/63/head
vahid 3 years ago
parent 44dad6a072
commit 7cbecadccc

@ -73,6 +73,12 @@ from qurator.eynollah.eynollah import Eynollah
is_flag=True,
help="if this parameter set to true, this tool will try to return all elements of layout.",
)
@click.option(
"--input_binary/--input-RGB",
"-ib/-irgb",
is_flag=True,
help="in general, eynollah uses RGB as input but if the input document is strongly dark, bright or for any other reason you can turn binarized input on. This option does not mean that you have to provide a binary image, otherwise this means that the tool itself will binarized the RGB input document.",
)
@click.option(
"--allow_scaling/--no-allow-scaling",
"-as/-noas",
@ -103,6 +109,7 @@ def main(
allow_enhancement,
curved_line,
full_layout,
input_binary,
allow_scaling,
headers_off,
log_level
@ -128,6 +135,7 @@ def main(
allow_enhancement=allow_enhancement,
curved_line=curved_line,
full_layout=full_layout,
input_binary=input_binary,
allow_scaling=allow_scaling,
headers_off=headers_off,
)

@ -27,6 +27,7 @@ import tensorflow as tf
tf.get_logger().setLevel("ERROR")
warnings.filterwarnings("ignore")
from .utils.contour import (
filter_contours_area_of_image,
find_contours_mean_y_diff,
@ -91,6 +92,7 @@ class Eynollah:
allow_enhancement=False,
curved_line=False,
full_layout=False,
input_binary=False,
allow_scaling=False,
headers_off=False,
override_dpi=None,
@ -108,6 +110,7 @@ class Eynollah:
self.allow_enhancement = allow_enhancement
self.curved_line = curved_line
self.full_layout = full_layout
self.input_binary = input_binary
self.allow_scaling = allow_scaling
self.headers_off = headers_off
self.plotter = None if not enable_plotting else EynollahPlotter(
@ -125,6 +128,7 @@ class Eynollah:
self.dir_models = dir_models
self.model_dir_of_enhancement = dir_models + "/model_enhancement.h5"
self.model_dir_of_binarization = dir_models + "/model_bin_sbb_ens.h5"
self.model_dir_of_col_classifier = dir_models + "/model_scale_classifier.h5"
self.model_region_dir_p = dir_models + "/model_main_covid19_lr5-5_scale_1_1_great.h5"
self.model_region_dir_p2 = dir_models + "/model_main_home_corona3_rot.h5"
@ -133,7 +137,7 @@ class Eynollah:
self.model_page_dir = dir_models + "/model_page_mixed_best.h5"
self.model_region_dir_p_ens = dir_models + "/model_ensemble_s.h5"
self.model_textline_dir = dir_models + "/model_textline_newspapers.h5"
def _cache_images(self, image_filename=None, image_pil=None):
ret = {}
if image_filename:
@ -309,27 +313,36 @@ class Eynollah:
return img_new, num_column_is_classified
def resize_image_with_column_classifier(self, is_image_enhanced):
def resize_image_with_column_classifier(self, is_image_enhanced, img_bin):
self.logger.debug("enter resize_image_with_column_classifier")
img = self.imread()
if self.input_binary:
img = np.copy(img_bin)
else:
img = self.imread()
_, page_coord = self.early_page_for_num_of_column_classification()
_, page_coord = self.early_page_for_num_of_column_classification(img)
model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier)
if self.input_binary:
img_in = np.copy(img)
img_in = img_in / 255.0
width_early = img_in.shape[1]
img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
img_in = img_in.reshape(1, 448, 448, 3)
else:
img_1ch = self.imread(grayscale=True, uint8=False)
width_early = img_1ch.shape[1]
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
img_1ch = self.imread(grayscale=True, uint8=False)
width_early = img_1ch.shape[1]
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
# plt.imshow(img_1ch)
# plt.show()
img_1ch = img_1ch / 255.0
# plt.imshow(img_1ch)
# plt.show()
img_1ch = img_1ch / 255.0
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
img_in[0, :, :, 0] = img_1ch[:, :]
img_in[0, :, :, 1] = img_1ch[:, :]
img_in[0, :, :, 2] = img_1ch[:, :]
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
img_in[0, :, :, 0] = img_1ch[:, :]
img_in[0, :, :, 1] = img_1ch[:, :]
img_in[0, :, :, 2] = img_1ch[:, :]
label_p_pred = model_num_classifier.predict(img_in)
num_col = np.argmax(label_p_pred[0]) + 1
@ -358,24 +371,51 @@ class Eynollah:
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
dpi = self.dpi
self.logger.info("Detected %s DPI", dpi)
img = self.imread()
if self.input_binary:
img = self.imread()
model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization)
prediction_bin = self.do_prediction(True, img, model_bin)
prediction_bin=prediction_bin[:,:,0]
prediction_bin = (prediction_bin[:,:]==0)*1
prediction_bin = prediction_bin*255
prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
session_bin.close()
del model_bin
del session_bin
gc.collect()
prediction_bin = prediction_bin.astype(np.uint8)
img= np.copy(prediction_bin)
img_bin = np.copy(prediction_bin)
else:
img = self.imread()
img_bin = None
_, page_coord = self.early_page_for_num_of_column_classification()
_, page_coord = self.early_page_for_num_of_column_classification(img_bin)
model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier)
img_1ch = self.imread(grayscale=True)
width_early = img_1ch.shape[1]
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
# plt.imshow(img_1ch)
# plt.show()
img_1ch = img_1ch / 255.0
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
img_in[0, :, :, 0] = img_1ch[:, :]
img_in[0, :, :, 1] = img_1ch[:, :]
img_in[0, :, :, 2] = img_1ch[:, :]
# plt.imshow(img_in[0,:,:,:])
# plt.show()
if self.input_binary:
img_in = np.copy(img)
width_early = img_in.shape[1]
img_in = img_in / 255.0
img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
img_in = img_in.reshape(1, 448, 448, 3)
else:
img_1ch = self.imread(grayscale=True)
width_early = img_1ch.shape[1]
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
img_1ch = img_1ch / 255.0
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
img_in[0, :, :, 0] = img_1ch[:, :]
img_in[0, :, :, 1] = img_1ch[:, :]
img_in[0, :, :, 2] = img_1ch[:, :]
label_p_pred = model_num_classifier.predict(img_in)
num_col = np.argmax(label_p_pred[0]) + 1
@ -396,7 +436,7 @@ class Eynollah:
self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
return is_image_enhanced, img, image_res, num_col, num_column_is_classified
return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin
# pylint: disable=attribute-defined-outside-init
def get_image_and_scales(self, img_org, img_res, scale):
@ -587,9 +627,13 @@ class Eynollah:
gc.collect()
return prediction_true
def early_page_for_num_of_column_classification(self):
def early_page_for_num_of_column_classification(self,img_bin):
self.logger.debug("enter early_page_for_num_of_column_classification")
img = self.imread()
if self.input_binary:
img =np.copy(img_bin)
img = img.astype(np.uint8)
else:
img = self.imread()
model_page, session_page = self.start_new_session_and_model(self.model_page_dir)
img = cv2.GaussianBlur(img, (5, 5), 0)
@ -1149,6 +1193,8 @@ class Eynollah:
self.logger.info("ratio_of_two_models: %s", rate_two_models)
if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD):
prediction_regions_org = np.copy(prediction_regions_org_copy)
prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3
mask_lines_only=(prediction_regions_org[:,:]==3)*1
@ -1158,6 +1204,47 @@ class Eynollah:
#plt.show()
prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2)
if rate_two_models<=40:
if self.input_binary:
prediction_bin = np.copy(img_org)
else:
model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization)
prediction_bin = self.do_prediction(True, img_org, model_bin)
prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
prediction_bin=prediction_bin[:,:,0]
prediction_bin = (prediction_bin[:,:]==0)*1
prediction_bin = prediction_bin*255
prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
session_bin.close()
del model_bin
del session_bin
gc.collect()
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens)
ratio_y=1
ratio_x=1
img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
prediction_regions_org = self.do_prediction(True, img, model_region)
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
prediction_regions_org=prediction_regions_org[:,:,0]
mask_lines_only=(prediction_regions_org[:,:]==3)*1
session_region.close()
del model_region
del session_region
gc.collect()
mask_texts_only=(prediction_regions_org[:,:]==1)*1
mask_images_only=(prediction_regions_org[:,:]==2)*1
@ -1170,26 +1257,71 @@ class Eynollah:
text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1))
K.clear_session()
return text_regions_p_true, erosion_hurts
except:
img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
if self.input_binary:
prediction_bin = np.copy(img_org)
else:
session_region.close()
del model_region
del session_region
gc.collect()
model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization)
prediction_bin = self.do_prediction(True, img_org, model_bin)
prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
prediction_bin=prediction_bin[:,:,0]
prediction_bin = (prediction_bin[:,:]==0)*1
prediction_bin = prediction_bin*255
prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
session_bin.close()
del model_bin
del session_bin
gc.collect()
prediction_regions_org = self.do_prediction(True, img, model_region)
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
prediction_regions_org = prediction_regions_org[:,:,0]
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens)
ratio_y=1
ratio_x=1
img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
prediction_regions_org = self.do_prediction(True, img, model_region)
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
prediction_regions_org=prediction_regions_org[:,:,0]
prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0
#mask_lines_only=(prediction_regions_org[:,:]==3)*1
session_region.close()
del model_region
del session_region
gc.collect()
#img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
#prediction_regions_org = self.do_prediction(True, img, model_region)
#prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
#prediction_regions_org = prediction_regions_org[:,:,0]
#prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0
#session_region.close()
#del model_region
#del session_region
#gc.collect()
@ -1506,7 +1638,7 @@ class Eynollah:
def run_enhancement(self):
self.logger.info("resize and enhance image")
is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified = self.resize_and_enhance_image_with_column_classifier()
is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier()
self.logger.info("Image is %senhanced", '' if is_image_enhanced else 'not ')
K.clear_session()
scale = 1
@ -1522,7 +1654,7 @@ class Eynollah:
else:
self.get_image_and_scales(img_org, img_res, scale)
if self.allow_scaling:
img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced)
img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin)
self.get_image_and_scales_after_enhancing(img_org, img_res)
return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified
@ -1688,13 +1820,10 @@ class Eynollah:
t0 = time.time()
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement()
self.logger.info("Enhancing took %ss ", str(time.time() - t0))
t1 = time.time()
text_regions_p_1 ,erosion_hurts = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier)
self.logger.info("Textregion detection took %ss ", str(time.time() - t1))
t1 = time.time()

@ -1595,13 +1595,12 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
try:
if erosion_hurts:
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=3.)
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=6.)
else:
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=7.)
except:
peaks_neg_fin=[]
print(peaks_neg_fin,'peaks_neg_fin0')
try:
peaks_neg_fin_org=np.copy(peaks_neg_fin)

@ -64,13 +64,14 @@ class EynollahXmlWriter():
for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])):
if not self.curved_line:
if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
textline_x_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) )
textline_y_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) )
else:
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0])/self.scale_y))
textline_x_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) )
textline_y_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) )
points_co += str(textline_x_coord)
points_co += ','
points_co += str(textline_y_coord)
if self.curved_line and np.abs(slopes_marginals[marginal_idx]) <= 45:
if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x))

Loading…
Cancel
Save