mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-09 20:29:55 +02:00
adding the binarization model and option to binarize input document for the cases like dark, stronly bright and other ones
This commit is contained in:
parent
44dad6a072
commit
7cbecadccc
4 changed files with 191 additions and 54 deletions
|
@ -73,6 +73,12 @@ from qurator.eynollah.eynollah import Eynollah
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help="if this parameter set to true, this tool will try to return all elements of layout.",
|
help="if this parameter set to true, this tool will try to return all elements of layout.",
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--input_binary/--input-RGB",
|
||||||
|
"-ib/-irgb",
|
||||||
|
is_flag=True,
|
||||||
|
help="in general, eynollah uses RGB as input but if the input document is strongly dark, bright or for any other reason you can turn binarized input on. This option does not mean that you have to provide a binary image, otherwise this means that the tool itself will binarized the RGB input document.",
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--allow_scaling/--no-allow-scaling",
|
"--allow_scaling/--no-allow-scaling",
|
||||||
"-as/-noas",
|
"-as/-noas",
|
||||||
|
@ -103,6 +109,7 @@ def main(
|
||||||
allow_enhancement,
|
allow_enhancement,
|
||||||
curved_line,
|
curved_line,
|
||||||
full_layout,
|
full_layout,
|
||||||
|
input_binary,
|
||||||
allow_scaling,
|
allow_scaling,
|
||||||
headers_off,
|
headers_off,
|
||||||
log_level
|
log_level
|
||||||
|
@ -128,6 +135,7 @@ def main(
|
||||||
allow_enhancement=allow_enhancement,
|
allow_enhancement=allow_enhancement,
|
||||||
curved_line=curved_line,
|
curved_line=curved_line,
|
||||||
full_layout=full_layout,
|
full_layout=full_layout,
|
||||||
|
input_binary=input_binary,
|
||||||
allow_scaling=allow_scaling,
|
allow_scaling=allow_scaling,
|
||||||
headers_off=headers_off,
|
headers_off=headers_off,
|
||||||
)
|
)
|
||||||
|
|
|
@ -27,6 +27,7 @@ import tensorflow as tf
|
||||||
tf.get_logger().setLevel("ERROR")
|
tf.get_logger().setLevel("ERROR")
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
|
|
||||||
|
|
||||||
from .utils.contour import (
|
from .utils.contour import (
|
||||||
filter_contours_area_of_image,
|
filter_contours_area_of_image,
|
||||||
find_contours_mean_y_diff,
|
find_contours_mean_y_diff,
|
||||||
|
@ -91,6 +92,7 @@ class Eynollah:
|
||||||
allow_enhancement=False,
|
allow_enhancement=False,
|
||||||
curved_line=False,
|
curved_line=False,
|
||||||
full_layout=False,
|
full_layout=False,
|
||||||
|
input_binary=False,
|
||||||
allow_scaling=False,
|
allow_scaling=False,
|
||||||
headers_off=False,
|
headers_off=False,
|
||||||
override_dpi=None,
|
override_dpi=None,
|
||||||
|
@ -108,6 +110,7 @@ class Eynollah:
|
||||||
self.allow_enhancement = allow_enhancement
|
self.allow_enhancement = allow_enhancement
|
||||||
self.curved_line = curved_line
|
self.curved_line = curved_line
|
||||||
self.full_layout = full_layout
|
self.full_layout = full_layout
|
||||||
|
self.input_binary = input_binary
|
||||||
self.allow_scaling = allow_scaling
|
self.allow_scaling = allow_scaling
|
||||||
self.headers_off = headers_off
|
self.headers_off = headers_off
|
||||||
self.plotter = None if not enable_plotting else EynollahPlotter(
|
self.plotter = None if not enable_plotting else EynollahPlotter(
|
||||||
|
@ -125,6 +128,7 @@ class Eynollah:
|
||||||
self.dir_models = dir_models
|
self.dir_models = dir_models
|
||||||
|
|
||||||
self.model_dir_of_enhancement = dir_models + "/model_enhancement.h5"
|
self.model_dir_of_enhancement = dir_models + "/model_enhancement.h5"
|
||||||
|
self.model_dir_of_binarization = dir_models + "/model_bin_sbb_ens.h5"
|
||||||
self.model_dir_of_col_classifier = dir_models + "/model_scale_classifier.h5"
|
self.model_dir_of_col_classifier = dir_models + "/model_scale_classifier.h5"
|
||||||
self.model_region_dir_p = dir_models + "/model_main_covid19_lr5-5_scale_1_1_great.h5"
|
self.model_region_dir_p = dir_models + "/model_main_covid19_lr5-5_scale_1_1_great.h5"
|
||||||
self.model_region_dir_p2 = dir_models + "/model_main_home_corona3_rot.h5"
|
self.model_region_dir_p2 = dir_models + "/model_main_home_corona3_rot.h5"
|
||||||
|
@ -309,13 +313,22 @@ class Eynollah:
|
||||||
|
|
||||||
return img_new, num_column_is_classified
|
return img_new, num_column_is_classified
|
||||||
|
|
||||||
def resize_image_with_column_classifier(self, is_image_enhanced):
|
def resize_image_with_column_classifier(self, is_image_enhanced, img_bin):
|
||||||
self.logger.debug("enter resize_image_with_column_classifier")
|
self.logger.debug("enter resize_image_with_column_classifier")
|
||||||
|
if self.input_binary:
|
||||||
|
img = np.copy(img_bin)
|
||||||
|
else:
|
||||||
img = self.imread()
|
img = self.imread()
|
||||||
|
|
||||||
_, page_coord = self.early_page_for_num_of_column_classification()
|
_, page_coord = self.early_page_for_num_of_column_classification(img)
|
||||||
model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier)
|
model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier)
|
||||||
|
if self.input_binary:
|
||||||
|
img_in = np.copy(img)
|
||||||
|
img_in = img_in / 255.0
|
||||||
|
width_early = img_in.shape[1]
|
||||||
|
img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
|
||||||
|
img_in = img_in.reshape(1, 448, 448, 3)
|
||||||
|
else:
|
||||||
img_1ch = self.imread(grayscale=True, uint8=False)
|
img_1ch = self.imread(grayscale=True, uint8=False)
|
||||||
width_early = img_1ch.shape[1]
|
width_early = img_1ch.shape[1]
|
||||||
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
||||||
|
@ -358,15 +371,43 @@ class Eynollah:
|
||||||
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
|
self.logger.debug("enter resize_and_enhance_image_with_column_classifier")
|
||||||
dpi = self.dpi
|
dpi = self.dpi
|
||||||
self.logger.info("Detected %s DPI", dpi)
|
self.logger.info("Detected %s DPI", dpi)
|
||||||
|
if self.input_binary:
|
||||||
img = self.imread()
|
img = self.imread()
|
||||||
|
model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization)
|
||||||
|
prediction_bin = self.do_prediction(True, img, model_bin)
|
||||||
|
|
||||||
_, page_coord = self.early_page_for_num_of_column_classification()
|
prediction_bin=prediction_bin[:,:,0]
|
||||||
|
prediction_bin = (prediction_bin[:,:]==0)*1
|
||||||
|
prediction_bin = prediction_bin*255
|
||||||
|
|
||||||
|
prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
||||||
|
|
||||||
|
session_bin.close()
|
||||||
|
del model_bin
|
||||||
|
del session_bin
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
prediction_bin = prediction_bin.astype(np.uint8)
|
||||||
|
img= np.copy(prediction_bin)
|
||||||
|
img_bin = np.copy(prediction_bin)
|
||||||
|
else:
|
||||||
|
img = self.imread()
|
||||||
|
img_bin = None
|
||||||
|
|
||||||
|
_, page_coord = self.early_page_for_num_of_column_classification(img_bin)
|
||||||
model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier)
|
model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier)
|
||||||
|
|
||||||
|
if self.input_binary:
|
||||||
|
img_in = np.copy(img)
|
||||||
|
width_early = img_in.shape[1]
|
||||||
|
img_in = img_in / 255.0
|
||||||
|
img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST)
|
||||||
|
img_in = img_in.reshape(1, 448, 448, 3)
|
||||||
|
else:
|
||||||
img_1ch = self.imread(grayscale=True)
|
img_1ch = self.imread(grayscale=True)
|
||||||
width_early = img_1ch.shape[1]
|
width_early = img_1ch.shape[1]
|
||||||
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
|
||||||
# plt.imshow(img_1ch)
|
|
||||||
# plt.show()
|
|
||||||
img_1ch = img_1ch / 255.0
|
img_1ch = img_1ch / 255.0
|
||||||
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
|
img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST)
|
||||||
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
|
img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3))
|
||||||
|
@ -374,8 +415,7 @@ class Eynollah:
|
||||||
img_in[0, :, :, 1] = img_1ch[:, :]
|
img_in[0, :, :, 1] = img_1ch[:, :]
|
||||||
img_in[0, :, :, 2] = img_1ch[:, :]
|
img_in[0, :, :, 2] = img_1ch[:, :]
|
||||||
|
|
||||||
# plt.imshow(img_in[0,:,:,:])
|
|
||||||
# plt.show()
|
|
||||||
|
|
||||||
label_p_pred = model_num_classifier.predict(img_in)
|
label_p_pred = model_num_classifier.predict(img_in)
|
||||||
num_col = np.argmax(label_p_pred[0]) + 1
|
num_col = np.argmax(label_p_pred[0]) + 1
|
||||||
|
@ -396,7 +436,7 @@ class Eynollah:
|
||||||
|
|
||||||
|
|
||||||
self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
|
self.logger.debug("exit resize_and_enhance_image_with_column_classifier")
|
||||||
return is_image_enhanced, img, image_res, num_col, num_column_is_classified
|
return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin
|
||||||
|
|
||||||
# pylint: disable=attribute-defined-outside-init
|
# pylint: disable=attribute-defined-outside-init
|
||||||
def get_image_and_scales(self, img_org, img_res, scale):
|
def get_image_and_scales(self, img_org, img_res, scale):
|
||||||
|
@ -587,8 +627,12 @@ class Eynollah:
|
||||||
gc.collect()
|
gc.collect()
|
||||||
return prediction_true
|
return prediction_true
|
||||||
|
|
||||||
def early_page_for_num_of_column_classification(self):
|
def early_page_for_num_of_column_classification(self,img_bin):
|
||||||
self.logger.debug("enter early_page_for_num_of_column_classification")
|
self.logger.debug("enter early_page_for_num_of_column_classification")
|
||||||
|
if self.input_binary:
|
||||||
|
img =np.copy(img_bin)
|
||||||
|
img = img.astype(np.uint8)
|
||||||
|
else:
|
||||||
img = self.imread()
|
img = self.imread()
|
||||||
model_page, session_page = self.start_new_session_and_model(self.model_page_dir)
|
model_page, session_page = self.start_new_session_and_model(self.model_page_dir)
|
||||||
img = cv2.GaussianBlur(img, (5, 5), 0)
|
img = cv2.GaussianBlur(img, (5, 5), 0)
|
||||||
|
@ -1150,6 +1194,8 @@ class Eynollah:
|
||||||
if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD):
|
if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD):
|
||||||
prediction_regions_org = np.copy(prediction_regions_org_copy)
|
prediction_regions_org = np.copy(prediction_regions_org_copy)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3
|
prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3
|
||||||
mask_lines_only=(prediction_regions_org[:,:]==3)*1
|
mask_lines_only=(prediction_regions_org[:,:]==3)*1
|
||||||
prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2)
|
prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2)
|
||||||
|
@ -1158,6 +1204,47 @@ class Eynollah:
|
||||||
#plt.show()
|
#plt.show()
|
||||||
|
|
||||||
prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2)
|
prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2)
|
||||||
|
|
||||||
|
|
||||||
|
if rate_two_models<=40:
|
||||||
|
if self.input_binary:
|
||||||
|
prediction_bin = np.copy(img_org)
|
||||||
|
else:
|
||||||
|
model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization)
|
||||||
|
prediction_bin = self.do_prediction(True, img_org, model_bin)
|
||||||
|
prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
|
||||||
|
|
||||||
|
prediction_bin=prediction_bin[:,:,0]
|
||||||
|
prediction_bin = (prediction_bin[:,:]==0)*1
|
||||||
|
prediction_bin = prediction_bin*255
|
||||||
|
|
||||||
|
prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
||||||
|
|
||||||
|
session_bin.close()
|
||||||
|
del model_bin
|
||||||
|
del session_bin
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens)
|
||||||
|
ratio_y=1
|
||||||
|
ratio_x=1
|
||||||
|
|
||||||
|
|
||||||
|
img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
|
||||||
|
|
||||||
|
prediction_regions_org = self.do_prediction(True, img, model_region)
|
||||||
|
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
||||||
|
prediction_regions_org=prediction_regions_org[:,:,0]
|
||||||
|
|
||||||
|
mask_lines_only=(prediction_regions_org[:,:]==3)*1
|
||||||
|
session_region.close()
|
||||||
|
del model_region
|
||||||
|
del session_region
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
|
||||||
mask_texts_only=(prediction_regions_org[:,:]==1)*1
|
mask_texts_only=(prediction_regions_org[:,:]==1)*1
|
||||||
mask_images_only=(prediction_regions_org[:,:]==2)*1
|
mask_images_only=(prediction_regions_org[:,:]==2)*1
|
||||||
|
|
||||||
|
@ -1176,20 +1263,65 @@ class Eynollah:
|
||||||
return text_regions_p_true, erosion_hurts
|
return text_regions_p_true, erosion_hurts
|
||||||
except:
|
except:
|
||||||
|
|
||||||
img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
|
if self.input_binary:
|
||||||
|
prediction_bin = np.copy(img_org)
|
||||||
prediction_regions_org = self.do_prediction(True, img, model_region)
|
else:
|
||||||
|
|
||||||
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
|
||||||
|
|
||||||
prediction_regions_org = prediction_regions_org[:,:,0]
|
|
||||||
|
|
||||||
prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0
|
|
||||||
session_region.close()
|
session_region.close()
|
||||||
del model_region
|
del model_region
|
||||||
del session_region
|
del session_region
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
|
||||||
|
model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization)
|
||||||
|
prediction_bin = self.do_prediction(True, img_org, model_bin)
|
||||||
|
prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h )
|
||||||
|
prediction_bin=prediction_bin[:,:,0]
|
||||||
|
|
||||||
|
prediction_bin = (prediction_bin[:,:]==0)*1
|
||||||
|
|
||||||
|
prediction_bin = prediction_bin*255
|
||||||
|
|
||||||
|
prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
session_bin.close()
|
||||||
|
del model_bin
|
||||||
|
del session_bin
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens)
|
||||||
|
ratio_y=1
|
||||||
|
ratio_x=1
|
||||||
|
|
||||||
|
|
||||||
|
img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x))
|
||||||
|
|
||||||
|
prediction_regions_org = self.do_prediction(True, img, model_region)
|
||||||
|
prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
||||||
|
prediction_regions_org=prediction_regions_org[:,:,0]
|
||||||
|
|
||||||
|
#mask_lines_only=(prediction_regions_org[:,:]==3)*1
|
||||||
|
session_region.close()
|
||||||
|
del model_region
|
||||||
|
del session_region
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
#img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1))
|
||||||
|
|
||||||
|
#prediction_regions_org = self.do_prediction(True, img, model_region)
|
||||||
|
|
||||||
|
#prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h )
|
||||||
|
|
||||||
|
#prediction_regions_org = prediction_regions_org[:,:,0]
|
||||||
|
|
||||||
|
#prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0
|
||||||
|
#session_region.close()
|
||||||
|
#del model_region
|
||||||
|
#del session_region
|
||||||
|
#gc.collect()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1506,7 +1638,7 @@ class Eynollah:
|
||||||
|
|
||||||
def run_enhancement(self):
|
def run_enhancement(self):
|
||||||
self.logger.info("resize and enhance image")
|
self.logger.info("resize and enhance image")
|
||||||
is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified = self.resize_and_enhance_image_with_column_classifier()
|
is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier()
|
||||||
self.logger.info("Image is %senhanced", '' if is_image_enhanced else 'not ')
|
self.logger.info("Image is %senhanced", '' if is_image_enhanced else 'not ')
|
||||||
K.clear_session()
|
K.clear_session()
|
||||||
scale = 1
|
scale = 1
|
||||||
|
@ -1522,7 +1654,7 @@ class Eynollah:
|
||||||
else:
|
else:
|
||||||
self.get_image_and_scales(img_org, img_res, scale)
|
self.get_image_and_scales(img_org, img_res, scale)
|
||||||
if self.allow_scaling:
|
if self.allow_scaling:
|
||||||
img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced)
|
img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin)
|
||||||
self.get_image_and_scales_after_enhancing(img_org, img_res)
|
self.get_image_and_scales_after_enhancing(img_org, img_res)
|
||||||
return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified
|
return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified
|
||||||
|
|
||||||
|
@ -1688,13 +1820,10 @@ class Eynollah:
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement()
|
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
self.logger.info("Enhancing took %ss ", str(time.time() - t0))
|
self.logger.info("Enhancing took %ss ", str(time.time() - t0))
|
||||||
|
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
text_regions_p_1 ,erosion_hurts = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier)
|
text_regions_p_1 ,erosion_hurts = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier)
|
||||||
|
|
||||||
self.logger.info("Textregion detection took %ss ", str(time.time() - t1))
|
self.logger.info("Textregion detection took %ss ", str(time.time() - t1))
|
||||||
|
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
|
|
|
@ -1595,13 +1595,12 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if erosion_hurts:
|
if erosion_hurts:
|
||||||
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=3.)
|
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=6.)
|
||||||
else:
|
else:
|
||||||
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=7.)
|
num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=7.)
|
||||||
except:
|
except:
|
||||||
peaks_neg_fin=[]
|
peaks_neg_fin=[]
|
||||||
|
|
||||||
print(peaks_neg_fin,'peaks_neg_fin0')
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
peaks_neg_fin_org=np.copy(peaks_neg_fin)
|
peaks_neg_fin_org=np.copy(peaks_neg_fin)
|
||||||
|
|
|
@ -64,13 +64,14 @@ class EynollahXmlWriter():
|
||||||
for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])):
|
for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])):
|
||||||
if not self.curved_line:
|
if not self.curved_line:
|
||||||
if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
|
if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
|
||||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
|
textline_x_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) )
|
||||||
points_co += ','
|
textline_y_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) )
|
||||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
|
|
||||||
else:
|
else:
|
||||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
|
textline_x_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) )
|
||||||
|
textline_y_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) )
|
||||||
|
points_co += str(textline_x_coord)
|
||||||
points_co += ','
|
points_co += ','
|
||||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0])/self.scale_y))
|
points_co += str(textline_y_coord)
|
||||||
if self.curved_line and np.abs(slopes_marginals[marginal_idx]) <= 45:
|
if self.curved_line and np.abs(slopes_marginals[marginal_idx]) <= 45:
|
||||||
if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
|
if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
|
||||||
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x))
|
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue