From 583cdcee2cb20f9e8de38213112be07a5b2a4c15 Mon Sep 17 00:00:00 2001 From: vahid Date: Tue, 13 Sep 2022 15:07:00 +0200 Subject: [PATCH] new (hybrid cnn+transformer) textline model which can accelerate to extract contour textlines faster --- qurator/eynollah/cli.py | 10 +++- qurator/eynollah/eynollah.py | 103 ++++++++++++++++++++++++++++++----- qurator/eynollah/writer.py | 15 ++--- requirements.txt | 12 ++-- 4 files changed, 113 insertions(+), 27 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 6828cc5..ddf986e 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -76,7 +76,13 @@ from qurator.eynollah.eynollah import Eynollah "--curved-line/--no-curvedline", "-cl/-nocl", is_flag=True, - help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectabgle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.", + help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectangle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.", +) +@click.option( + "--textline_light/--no-textline_light", + "-tll/-notll", + is_flag=True, + help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectangle bounding box of textline with a faster method.", ) @click.option( "--full-layout/--no-full-layout", @@ -139,6 +145,7 @@ def main( enable_plotting, allow_enhancement, curved_line, + textline_light, full_layout, tables, input_binary, @@ -170,6 +177,7 @@ def main( enable_plotting=enable_plotting, allow_enhancement=allow_enhancement, curved_line=curved_line, + textline_light=textline_light, full_layout=full_layout, tables=tables, input_binary=input_binary, diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c125b1a..8de793c 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -30,6 +30,7 @@ from scipy.signal import find_peaks import matplotlib.pyplot as plt from scipy.ndimage import gaussian_filter1d from keras.backend import set_session +from tensorflow.keras import layers from .utils.contour import ( filter_contours_area_of_image, @@ -83,6 +84,60 @@ DPI_THRESHOLD = 298 MAX_SLOPE = 999 KERNEL = np.ones((5, 5), np.uint8) +projection_dim = 64 +patch_size = 1 +num_patches =21*21#14*14#28*28#14*14#28*28 + + +class Patches(layers.Layer): + def __init__(self, **kwargs): + super(Patches, self).__init__() + self.patch_size = patch_size + + def call(self, images): + batch_size = tf.shape(images)[0] + patches = tf.image.extract_patches( + images=images, + sizes=[1, self.patch_size, self.patch_size, 1], + strides=[1, self.patch_size, self.patch_size, 1], + rates=[1, 1, 1, 1], + padding="VALID", + ) + patch_dims = patches.shape[-1] + patches = tf.reshape(patches, [batch_size, -1, patch_dims]) + return patches + def get_config(self): + + config = super().get_config().copy() + config.update({ + 'patch_size': self.patch_size, + }) + return config + + +class PatchEncoder(layers.Layer): + def __init__(self, **kwargs): + super(PatchEncoder, self).__init__() + self.num_patches = num_patches + self.projection = layers.Dense(units=projection_dim) + self.position_embedding = layers.Embedding( + input_dim=num_patches, output_dim=projection_dim + ) + + def call(self, patch): + positions = tf.range(start=0, limit=self.num_patches, delta=1) + encoded = self.projection(patch) + self.position_embedding(positions) + return encoded + def get_config(self): + + config = super().get_config().copy() + config.update({ + 'num_patches': self.num_patches, + 'projection': self.projection, + 'position_embedding': self.position_embedding, + }) + return config + class Eynollah: def __init__( self, @@ -100,6 +155,7 @@ class Eynollah: enable_plotting=False, allow_enhancement=False, curved_line=False, + textline_light=False, full_layout=False, tables=False, input_binary=False, @@ -130,6 +186,7 @@ class Eynollah: self.enable_plotting = enable_plotting self.allow_enhancement = allow_enhancement self.curved_line = curved_line + self.textline_light = textline_light self.full_layout = full_layout self.tables = tables self.input_binary = input_binary @@ -151,6 +208,7 @@ class Eynollah: dir_out=self.dir_out, image_filename=self.image_filename, curved_line=self.curved_line, + textline_light = self.textline_light, pcgts=pcgts) self.logger = logger if logger else getLogger('eynollah') self.dir_models = dir_models @@ -165,7 +223,10 @@ class Eynollah: self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425.h5" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425.h5" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314.h5" - self.model_textline_dir = dir_models + "/eynollah-textline_20210425.h5" + if self.textline_light: + self.model_textline_dir = dir_models + "/model_17.h5" + else: + self.model_textline_dir = dir_models + "/eynollah-textline_20210425.h5" self.model_tables = dir_models + "/eynollah-tables_20210319.h5" if dir_in and light_version: @@ -603,7 +664,10 @@ class Eynollah: gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True) session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) - model = load_model(model_dir, compile=False) + try: + model = load_model(model_dir, compile=False) + except: + model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model, session @@ -1368,12 +1432,17 @@ class Eynollah: # plt.imshow(mask_only_con_region) # plt.show() - all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]) - mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] - - - all_text_region_raw[mask_only_con_region == 0] = 0 - cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, [slope_deskew][0], contours_par_per_process[mv], boxes_text[mv]) + + if self.textline_light: + all_text_region_raw = np.copy(textline_mask_tot_ea) + all_text_region_raw[mask_only_con_region == 0] = 0 + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(all_text_region_raw) + cnt_clean_rot = filter_contours_area_of_image(all_text_region_raw, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + else: + all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]) + mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] + all_text_region_raw[mask_only_con_region == 0] = 0 + cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, [slope_deskew][0], contours_par_per_process[mv], boxes_text[mv]) textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) @@ -1481,8 +1550,10 @@ class Eynollah: if not self.dir_in: session_textline.close() - - return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0] + if self.textline_light: + return (prediction_textline[:, :, 0]==1)*1, (prediction_textline_longshot_true_size[:, :, 0]==1)*1 + else: + return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0] def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): self.logger.debug('enter do_work_of_slopes') @@ -2562,6 +2633,8 @@ class Eynollah: scaler_h_textline = 1 # 1.2#1.2 scaler_w_textline = 1 # 0.9#1 textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline) + if self.textline_light: + textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) if not self.dir_in: K.clear_session() if self.plotter: @@ -2870,7 +2943,6 @@ class Eynollah: self.ls_imgs = [1] for img_name in self.ls_imgs: - print(img_name,'img_name') t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) @@ -2887,6 +2959,7 @@ class Eynollah: num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \ self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts) #self.logger.info("run graphics %.1fs ", time.time() - t1t) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) else: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -3043,8 +3116,12 @@ class Eynollah: if not self.curved_line: if self.light_version: - slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + if self.textline_light: + slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + else: + slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index d36d3ab..d5704f6 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -22,12 +22,13 @@ import numpy as np class EynollahXmlWriter(): - def __init__(self, *, dir_out, image_filename, curved_line, pcgts=None): + def __init__(self, *, dir_out, image_filename, curved_line,textline_light, pcgts=None): self.logger = getLogger('eynollah.writer') self.counter = EynollahIdCounter() self.dir_out = dir_out self.image_filename = image_filename self.curved_line = curved_line + self.textline_light = textline_light self.pcgts = pcgts self.scale_x = None # XXX set outside __init__ self.scale_y = None # XXX set outside __init__ @@ -60,7 +61,7 @@ class EynollahXmlWriter(): marginal_region.add_TextLine(textline) points_co = '' for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])): - if not self.curved_line: + if not (self.curved_line or self.textline_light): if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2: textline_x_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) ) textline_y_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) ) @@ -70,7 +71,7 @@ class EynollahXmlWriter(): points_co += str(textline_x_coord) points_co += ',' points_co += str(textline_y_coord) - if self.curved_line and np.abs(slopes_marginals[marginal_idx]) <= 45: + if (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) <= 45: if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2: points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x)) points_co += ',' @@ -80,7 +81,7 @@ class EynollahXmlWriter(): points_co += ',' points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y)) - elif self.curved_line and np.abs(slopes_marginals[marginal_idx]) > 45: + elif (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) > 45: if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2: points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) points_co += ',' @@ -101,7 +102,7 @@ class EynollahXmlWriter(): region_bboxes = all_box_coord[region_idx] points_co = '' for idx_contour_textline, contour_textline in enumerate(all_found_texline_polygons[region_idx][j]): - if not self.curved_line: + if not (self.curved_line or self.textline_light): if len(contour_textline) == 2: textline_x_coord = max(0, int((contour_textline[0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) textline_y_coord = max(0, int((contour_textline[1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) @@ -112,7 +113,7 @@ class EynollahXmlWriter(): points_co += ',' points_co += str(textline_y_coord) - if self.curved_line and np.abs(slopes[region_idx]) <= 45: + if (self.curved_line or self.textline_light) and np.abs(slopes[region_idx]) <= 45: if len(contour_textline) == 2: points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x)) points_co += ',' @@ -121,7 +122,7 @@ class EynollahXmlWriter(): points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x)) points_co += ',' points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y)) - elif self.curved_line and np.abs(slopes[region_idx]) > 45: + elif (self.curved_line or self.textline_light) and np.abs(slopes[region_idx]) > 45: if len(contour_textline)==2: points_co += str(int((contour_textline[0] + region_bboxes[2] + page_coord[2])/self.scale_x)) points_co += ',' diff --git a/requirements.txt b/requirements.txt index 8520780..54bb55e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # ocrd includes opencv, numpy, shapely, click -ocrd >= 2.23.3 -keras >= 2.3.1, < 2.4 -scikit-learn >= 0.23.2 -tensorflow-gpu >= 1.15, < 2 -imutils >= 0.5.3 +ocrd +keras == 2.6.0 +scikit-learn +tensorflow-gpu == 2.6.0 +imutils matplotlib -setuptools >= 50 +setuptools