From 2736ddb42d9c002f8c453b9e59aedf9fbe6fc9bc Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 10 Mar 2022 14:00:31 -0500 Subject: [PATCH 01/67] light version --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index c52560b..7673954 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,9 @@ Some heuristic methods are also employed to further improve the model prediction * After deskewing, a calculation of the pixel distribution on the X-axis allows the separation of textlines (foreground) and background pixels. * Finally, using the derived coordinates, bounding boxes are determined for each textline. +## Light version +layout detection is implemented in lower scale and with only one model. + ## Installation `pip install .` or From b8a532180a836f08f0364dd0f350e70e73b77f0b Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 10 Mar 2022 23:52:10 -0500 Subject: [PATCH 02/67] light version integration --- qurator/eynollah/eynollah.py | 531 +++++++++++++++++++++++++++++++++-- 1 file changed, 511 insertions(+), 20 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 81c0b0c..478372b 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -143,6 +143,7 @@ class Eynollah: self.model_region_dir_fully = dir_models + "/model_3up_new_good_no_augmentation.h5" self.model_page_dir = dir_models + "/model_page_mixed_best.h5" self.model_region_dir_p_ens = dir_models + "/model_ensemble_s.h5" + self.model_region_dir_p_ens_light = dir_models + "/model_11.h5" self.model_textline_dir = dir_models + "/model_textline_newspapers.h5" self.model_tables = dir_models + "/model_tables_ens_mixed_new_2.h5" @@ -378,10 +379,13 @@ class Eynollah: return img, img_new, is_image_enhanced - def resize_and_enhance_image_with_column_classifier(self): + def resize_and_enhance_image_with_column_classifier(self,light_version): self.logger.debug("enter resize_and_enhance_image_with_column_classifier") - dpi = self.dpi - self.logger.info("Detected %s DPI", dpi) + if light_version: + dpi = 300 + else: + dpi = self.dpi + self.logger.info("Detected %s DPI", dpi) if self.input_binary: img = self.imread() model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) @@ -637,6 +641,243 @@ class Eynollah: del model gc.collect() return prediction_true + def do_prediction_new_concept(self, patches, img, model, marginal_of_patch_percent=0.1): + self.logger.debug("enter do_prediction") + + img_height_model = model.layers[len(model.layers) - 1].output_shape[1] + img_width_model = model.layers[len(model.layers) - 1].output_shape[2] + + if not patches: + img_h_page = img.shape[0] + img_w_page = img.shape[1] + img = img / float(255.0) + img = resize_image(img, img_height_model, img_width_model) + + label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + + seg = np.argmax(label_p_pred, axis=3)[0] + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + prediction_true = resize_image(seg_color, img_h_page, img_w_page) + prediction_true = prediction_true.astype(np.uint8) + + + else: + if img.shape[0] < img_height_model: + img = resize_image(img, img_height_model, img.shape[1]) + + if img.shape[1] < img_width_model: + img = resize_image(img, img.shape[0], img_width_model) + + self.logger.info("Image dimensions: %sx%s", img_height_model, img_width_model) + margin = int(marginal_of_patch_percent * img_height_model) + width_mid = img_width_model - 2 * margin + height_mid = img_height_model - 2 * margin + img = img / float(255.0) + img = img.astype(np.float16) + img_h = img.shape[0] + img_w = img.shape[1] + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width_mid) + nyf = img_h / float(height_mid) + nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) + nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) + + for i in range(nxf): + for j in range(nyf): + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + img_width_model + else: + index_x_d = i * width_mid + index_x_u = index_x_d + img_width_model + if j == 0: + index_y_d = j * height_mid + index_y_u = index_y_d + img_height_model + else: + index_y_d = j * height_mid + index_y_u = index_y_d + img_height_model + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - img_width_model + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - img_height_model + + img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + seg = np.argmax(label_p_pred, axis=3)[0] + + + seg_not_base = label_p_pred[0,:,:,4] + ##seg2 = -label_p_pred[0,:,:,2] + + + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + + + seg_test = label_p_pred[0,:,:,1] + ##seg2 = -label_p_pred[0,:,:,2] + + + seg_test[seg_test>0.75] =1 + seg_test[seg_test<1] =0 + + + seg_line = label_p_pred[0,:,:,3] + ##seg2 = -label_p_pred[0,:,:,2] + + + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + + seg_background = label_p_pred[0,:,:,0] + ##seg2 = -label_p_pred[0,:,:,2] + + + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + ##seg = seg+seg2 + #seg = label_p_pred[0,:,:,2] + #seg[seg>0.4] =1 + #seg[seg<1] =0 + + ##plt.imshow(seg_test) + ##plt.show() + + ##plt.imshow(seg_background) + ##plt.show() + #seg[seg==1]=0 + #seg[seg_test==1]=1 + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + + if i == 0 and j == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] + mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg + prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color + elif i == nxf - 1 and j == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] + mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg + prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color + elif i == 0 and j == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] + mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg + prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color + elif i == nxf - 1 and j == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] + mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg + prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color + elif i == 0 and j != 0 and j != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] + mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg + prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color + elif i == nxf - 1 and j != 0 and j != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] + mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg + prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color + elif i != 0 and i != nxf - 1 and j == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] + mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg + prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + elif i != 0 and i != nxf - 1 and j == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] + mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg + prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] + mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg + prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + + prediction_true = prediction_true.astype(np.uint8) + del model + gc.collect() + return prediction_true + + def early_page_for_num_of_column_classification(self,img_bin): + self.logger.debug("enter early_page_for_num_of_column_classification") + if self.input_binary: + img =np.copy(img_bin) + img = img.astype(np.uint8) + else: + img = self.imread() + model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + img = cv2.GaussianBlur(img, (5, 5), 0) + + img_page_prediction = self.do_prediction(False, img, model_page) + + imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(imgray, 0, 255, 0) + thresh = cv2.dilate(thresh, KERNEL, iterations=3) + contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + if len(contours)>0: + cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) + cnt = contours[np.argmax(cnt_size)] + x, y, w, h = cv2.boundingRect(cnt) + box = [x, y, w, h] + else: + box = [0, 0, img.shape[1], img.shape[0]] + croped_page, page_coord = crop_image_inside_box(box, img) + session_page.close() + del model_page + del session_page + gc.collect() + K.clear_session() + self.logger.debug("exit early_page_for_num_of_column_classification") + return croped_page, page_coord + + def extract_page(self): + self.logger.debug("enter extract_page") + cont_page = [] + model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + img = cv2.GaussianBlur(self.image, (5, 5), 0) + img_page_prediction = self.do_prediction(False, img, model_page) + imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(imgray, 0, 255, 0) + thresh = cv2.dilate(thresh, KERNEL, iterations=3) + contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + if len(contours)>0: + cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) + cnt = contours[np.argmax(cnt_size)] + x, y, w, h = cv2.boundingRect(cnt) + if x <= 30: + w += x + x = 0 + if (self.image.shape[1] - (x + w)) <= 30: + w = w + (self.image.shape[1] - (x + w)) + if y <= 30: + h = h + y + y = 0 + if (self.image.shape[0] - (y + h)) <= 30: + h = h + (self.image.shape[0] - (y + h)) + + box = [x, y, w, h] + else: + box = [0, 0, img.shape[1], img.shape[0]] + croped_page, page_coord = crop_image_inside_box(box, self.image) + cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) + session_page.close() + del model_page + del session_page + gc.collect() + K.clear_session() + self.logger.debug("exit extract_page") + return croped_page, page_coord, cont_page def early_page_for_num_of_column_classification(self,img_bin): self.logger.debug("enter early_page_for_num_of_column_classification") @@ -808,6 +1049,54 @@ class Eynollah: self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 + + def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + self.logger.debug("enter get_slopes_and_deskew_new") + num_cores = cpu_count() + queue_of_all_params = Queue() + + processes = [] + nh = np.linspace(0, len(boxes), num_cores + 1) + indexes_by_text_con = np.array(range(len(contours_par))) + for i in range(num_cores): + boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])] + contours_per_process = contours[int(nh[i]) : int(nh[i + 1])] + contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])] + indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] + + processes.append(Process(target=self.do_work_of_slopes_new_light, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, indexes_text_con_per_process, image_page_rotated, slope_deskew))) + for i in range(num_cores): + processes[i].start() + + slopes = [] + all_found_texline_polygons = [] + all_found_text_regions = [] + all_found_text_regions_par = [] + boxes = [] + all_box_coord = [] + all_index_text_con = [] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + slopes_for_sub_process = list_all_par[0] + polys_for_sub_process = list_all_par[1] + boxes_for_sub_process = list_all_par[2] + contours_for_subprocess = list_all_par[3] + contours_par_for_subprocess = list_all_par[4] + boxes_coord_for_subprocess = list_all_par[5] + indexes_for_subprocess = list_all_par[6] + for j in range(len(slopes_for_sub_process)): + slopes.append(slopes_for_sub_process[j]) + all_found_texline_polygons.append(polys_for_sub_process[j]) + boxes.append(boxes_for_sub_process[j]) + all_found_text_regions.append(contours_for_subprocess[j]) + all_found_text_regions_par.append(contours_par_for_subprocess[j]) + all_box_coord.append(boxes_coord_for_subprocess[j]) + all_index_text_con.append(indexes_for_subprocess[j]) + for i in range(num_cores): + processes[i].join() + self.logger.debug('slopes %s', slopes) + self.logger.debug("exit get_slopes_and_deskew_new") + return slopes, all_found_texline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new") @@ -1017,7 +1306,44 @@ class Eynollah: all_box_coord_per_process.append(crop_coor) queue_of_all_params.put([textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours, slopes_per_each_subprocess]) + def do_work_of_slopes_new_light(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew): + self.logger.debug('enter do_work_of_slopes_new') + slopes_per_each_subprocess = [] + bounding_box_of_textregion_per_each_subprocess = [] + textlines_rectangles_per_each_subprocess = [] + contours_textregion_per_each_subprocess = [] + contours_textregion_par_per_each_subprocess = [] + all_box_coord_per_process = [] + index_by_text_region_contours = [] + for mv in range(len(boxes_text)): + _, crop_coor = crop_image_inside_box(boxes_text[mv],image_page_rotated) + mask_textline = np.zeros((textline_mask_tot_ea.shape)) + mask_textline = cv2.fillPoly(mask_textline,pts=[contours_per_process[mv]],color=(1,1,1)) + all_text_region_raw = (textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ] + all_text_region_raw=all_text_region_raw.astype(np.uint8) + + slopes_per_each_subprocess.append([slope_deskew][0]) + mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) + mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) + + # plt.imshow(mask_only_con_region) + # plt.show() + all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]) + mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] + + all_text_region_raw[mask_only_con_region == 0] = 0 + cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, [slope_deskew][0], contours_par_per_process[mv], boxes_text[mv]) + + textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) + index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) + bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) + + contours_textregion_per_each_subprocess.append(contours_per_process[mv]) + contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv]) + all_box_coord_per_process.append(crop_coor) + queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) + def do_work_of_slopes_new(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew): self.logger.debug('enter do_work_of_slopes_new') slopes_per_each_subprocess = [] @@ -1144,6 +1470,110 @@ class Eynollah: q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) + def get_regions_from_xy_2models_light(self,img,is_image_enhanced, num_col_classifier): + self.logger.debug("enter get_regions_from_xy_2models") + erosion_hurts = False + img_org = np.copy(img) + img_height_h = img_org.shape[0] + img_width_h = img_org.shape[1] + + #model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + + + + if num_col_classifier == 1: + img_w_new = 1000 + img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 1500 + img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) + + elif num_col_classifier == 3: + img_w_new = 2000 + img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) + + elif num_col_classifier == 4: + img_w_new = 2500 + img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) + elif num_col_classifier == 5: + img_w_new = 3000 + img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) + else: + img_w_new = 4000 + img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) + gc.collect() + ##img_resized = resize_image(img_bin,img_height_h, img_width_h ) + img_resized = resize_image(img,img_h_new, img_w_new ) + + tbin = time.time() + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + print("time bin session", time.time()-tbin) + prediction_bin = self.do_prediction(True, img_resized, model_bin) + print("time bin all ", time.time()-tbin) + prediction_bin=prediction_bin[:,:,0] + prediction_bin = (prediction_bin[:,:]==0)*1 + prediction_bin = prediction_bin*255 + + prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + session_bin.close() + del model_bin + del session_bin + gc.collect() + + prediction_bin = prediction_bin.astype(np.uint16) + #img= np.copy(prediction_bin) + img_bin = np.copy(prediction_bin) + + + + + tline = time.time() + textline_mask_tot_ea = self.run_textline(img_bin) + print("time line all ", time.time()-tline) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + + + #plt.imshow(img_bin) + #plt.show() + + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) + + #plt.imshow(prediction_regions_org[:,:,0]) + #plt.show() + + prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) + + prediction_regions_org=prediction_regions_org[:,:,0] + + mask_lines_only = (prediction_regions_org[:,:] ==3)*1 + + mask_texts_only = (prediction_regions_org[:,:] ==1)*1 + + mask_images_only=(prediction_regions_org[:,:] ==2)*1 + + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) + polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + + + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) + + + text_regions_p_true = np.zeros(prediction_regions_org.shape) + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) + + text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) + + #erosion_hurts = True + K.clear_session() + return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_from_xy_2models") @@ -1939,7 +2369,54 @@ class Eynollah: return prediction_table_erode.astype(np.int16) + def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts): + img_g = self.imread(grayscale=True, uint8=True) + + img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) + img_g3 = img_g3.astype(np.uint8) + img_g3[:, :, 0] = img_g[:, :] + img_g3[:, :, 1] = img_g[:, :] + img_g3[:, :, 2] = img_g[:, :] + image_page, page_coord, cont_page = self.extract_page() + + if self.tables: + table_prediction = self.get_tables_from_model(image_page, num_col_classifier) + else: + table_prediction = (np.zeros((image_page.shape[0], image_page.shape[1]))).astype(np.int16) + + if self.plotter: + self.plotter.save_page_image(image_page) + + text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + mask_images = (text_regions_p_1[:, :] == 2) * 1 + mask_images = mask_images.astype(np.uint8) + mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) + mask_lines = (text_regions_p_1[:, :] == 3) * 1 + mask_lines = mask_lines.astype(np.uint8) + img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 + img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) + + + if erosion_hurts: + img_only_regions = np.copy(img_only_regions_with_sep[:,:]) + else: + img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6) + + ##print(img_only_regions.shape,'img_only_regions') + ##plt.imshow(img_only_regions[:,:]) + ##plt.show() + num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) + try: + num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) + num_col = num_col + 1 + if not num_column_is_classified: + num_col_classifier = num_col + 1 + except Exception as why: + self.logger.error(why) + num_col = None + return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts): img_g = self.imread(grayscale=True, uint8=True) @@ -1985,9 +2462,9 @@ class Eynollah: num_col = None return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction - def run_enhancement(self): + def run_enhancement(self,light_version): self.logger.info("Resizing and enhancing image...") - is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier() + is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier(light_version) self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ') K.clear_session() scale = 1 @@ -2301,22 +2778,31 @@ class Eynollah: """ Get image and scales, then extract the page of scanned image """ + light_version = True self.logger.debug("enter run") t0 = time.time() - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement() + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) t1 = time.time() - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + if light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_from_xy_2models_light(img_res, is_image_enhanced, num_col_classifier) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts) + else: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - self.logger.info('cont_page %s', cont_page) + t1 = time.time() + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + self.logger.info('cont_page %s', cont_page) if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") @@ -2325,12 +2811,13 @@ class Eynollah: return pcgts t1 = time.time() - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) + if not light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) - t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) t1 = time.time() #plt.imshow(table_prediction) #plt.show() @@ -2455,8 +2942,12 @@ class Eynollah: boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) if not self.curved_line: - slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + if light_version: + slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + else: + slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: scale_param = 1 From cf5ef8f5ae8bdf194dab6c4b9ba06824f25a41d5 Mon Sep 17 00:00:00 2001 From: vahid Date: Mon, 14 Mar 2022 11:37:32 -0400 Subject: [PATCH 03/67] light version as option --- qurator/eynollah/cli.py | 10 +++++++++- qurator/eynollah/eynollah.py | 11 ++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index f343918..6aabbae 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -92,11 +92,17 @@ from qurator.eynollah.eynollah import Eynollah help="if this parameter set to true, this tool would check the scale and if needed it will scale it to perform better layout detection", ) @click.option( - "--headers-off/--headers-on", + "--headers_off/--headers-on", "-ho/-noho", is_flag=True, help="if this parameter set to true, this tool would ignore headers role in reading order", ) +@click.option( + "--light_version/--original", + "-light/-org", + is_flag=True, + help="if this parameter set to true, this tool would use lighter version", +) @click.option( "--log-level", "-l", @@ -119,6 +125,7 @@ def main( input_binary, allow_scaling, headers_off, + light_version, log_level ): if log_level: @@ -146,6 +153,7 @@ def main( input_binary=input_binary, allow_scaling=allow_scaling, headers_off=headers_off, + light_version=light_version, ) pcgts = eynollah.run() eynollah.writer.write_pagexml(pcgts) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 478372b..62ae6de 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -100,6 +100,7 @@ class Eynollah: input_binary=False, allow_scaling=False, headers_off=False, + light_version=False, override_dpi=None, logger=None, pcgts=None, @@ -119,6 +120,7 @@ class Eynollah: self.input_binary = input_binary self.allow_scaling = allow_scaling self.headers_off = headers_off + self.light_version = light_version self.plotter = None if not enable_plotting else EynollahPlotter( dir_out=self.dir_out, dir_of_all=dir_of_all, @@ -2778,16 +2780,15 @@ class Eynollah: """ Get image and scales, then extract the page of scanned image """ - light_version = True self.logger.debug("enter run") t0 = time.time() - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(light_version) + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) t1 = time.time() - if light_version: + if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_from_xy_2models_light(img_res, is_image_enhanced, num_col_classifier) slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) @@ -2811,7 +2812,7 @@ class Eynollah: return pcgts t1 = time.time() - if not light_version: + if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) self.logger.info("textline detection took %.1fs", time.time() - t1) @@ -2942,7 +2943,7 @@ class Eynollah: boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) if not self.curved_line: - if light_version: + if self.light_version: slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: From c606391c312eceab9aa3ebff071bdf12a30b45cc Mon Sep 17 00:00:00 2001 From: vahid Date: Tue, 29 Mar 2022 06:55:19 -0400 Subject: [PATCH 04/67] flow from directory --- qurator/eynollah/cli.py | 14 +- qurator/eynollah/eynollah.py | 975 ++++++++++++++++------------- qurator/eynollah/utils/__init__.py | 80 ++- qurator/eynollah/utils/contour.py | 133 +++- 4 files changed, 766 insertions(+), 436 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 6aabbae..ca938c4 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -10,7 +10,6 @@ from qurator.eynollah.eynollah import Eynollah "-i", help="image filename", type=click.Path(exists=True, dir_okay=False), - required=True, ) @click.option( "--out", @@ -19,6 +18,12 @@ from qurator.eynollah.eynollah import Eynollah type=click.Path(exists=True, file_okay=False), required=True, ) +@click.option( + "--dir_in", + "-di", + help="directory of images", + type=click.Path(exists=True, file_okay=False), +) @click.option( "--model", "-m", @@ -112,6 +117,7 @@ from qurator.eynollah.eynollah import Eynollah def main( image, out, + dir_in, model, save_images, save_layout, @@ -140,6 +146,7 @@ def main( eynollah = Eynollah( image_filename=image, dir_out=out, + dir_in=dir_in, dir_models=model, dir_of_cropped_images=save_images, dir_of_layout=save_layout, @@ -155,8 +162,9 @@ def main( headers_off=headers_off, light_version=light_version, ) - pcgts = eynollah.run() - eynollah.writer.write_pagexml(pcgts) + eynollah.run() + #pcgts = eynollah.run() + ##eynollah.writer.write_pagexml(pcgts) if __name__ == "__main__": main() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 62ae6de..b3fca7b 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -29,6 +29,7 @@ warnings.filterwarnings("ignore") from scipy.signal import find_peaks import matplotlib.pyplot as plt from scipy.ndimage import gaussian_filter1d +from keras.backend import set_session from .utils.contour import ( filter_contours_area_of_image, @@ -38,6 +39,7 @@ from .utils.contour import ( find_features_of_contours, get_text_region_boxes_by_given_contours, get_textregion_contours_in_org_image, + get_textregion_contours_in_org_image_light, return_contours_of_image, return_contours_of_interested_region, return_contours_of_interested_region_by_min_size, @@ -65,6 +67,7 @@ from .utils import ( put_drop_out_from_only_drop_model, putt_bb_of_drop_capitals_of_model_in_patches_in_layout, check_any_text_region_in_model_one_is_main_or_header, + check_any_text_region_in_model_one_is_main_or_header_light, small_textlines_to_parent_adherence2, order_of_regions, find_number_of_columns_in_document, @@ -84,10 +87,11 @@ class Eynollah: def __init__( self, dir_models, - image_filename, + image_filename=None, image_pil=None, image_filename_stem=None, dir_out=None, + dir_in=None, dir_of_cropped_images=None, dir_of_layout=None, dir_of_deskewed=None, @@ -105,14 +109,16 @@ class Eynollah: logger=None, pcgts=None, ): - if image_pil: - self._imgs = self._cache_images(image_pil=image_pil) - else: - self._imgs = self._cache_images(image_filename=image_filename) - if override_dpi: - self.dpi = override_dpi - self.image_filename = image_filename + if not dir_in: + if image_pil: + self._imgs = self._cache_images(image_pil=image_pil) + else: + self._imgs = self._cache_images(image_filename=image_filename) + if override_dpi: + self.dpi = override_dpi + self.image_filename = image_filename self.dir_out = dir_out + self.dir_in = dir_in self.allow_enhancement = allow_enhancement self.curved_line = curved_line self.full_layout = full_layout @@ -121,6 +127,7 @@ class Eynollah: self.allow_scaling = allow_scaling self.headers_off = headers_off self.light_version = light_version + self.pcgts = pcgts self.plotter = None if not enable_plotting else EynollahPlotter( dir_out=self.dir_out, dir_of_all=dir_of_all, @@ -128,11 +135,12 @@ class Eynollah: dir_of_cropped_images=dir_of_cropped_images, dir_of_layout=dir_of_layout, image_filename_stem=Path(Path(image_filename).name).stem) - self.writer = EynollahXmlWriter( - dir_out=self.dir_out, - image_filename=self.image_filename, - curved_line=self.curved_line, - pcgts=pcgts) + if not dir_in: + self.writer = EynollahXmlWriter( + dir_out=self.dir_out, + image_filename=self.image_filename, + curved_line=self.curved_line, + pcgts=pcgts) self.logger = logger if logger else getLogger('eynollah') self.dir_models = dir_models @@ -149,6 +157,41 @@ class Eynollah: self.model_textline_dir = dir_models + "/model_textline_newspapers.h5" self.model_tables = dir_models + "/model_tables_ens_mixed_new_2.h5" + if dir_in and light_version: + config = tf.compat.v1.ConfigProto() + config.gpu_options.allow_growth = True + session = tf.compat.v1.Session(config=config) + set_session(session) + + self.model_page = self.our_load_model(self.model_page_dir) + self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) + self.model_bin = self.our_load_model(self.model_dir_of_binarization) + self.model_textline = self.our_load_model(self.model_textline_dir) + self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) + self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) + self.model_region_fl = self.our_load_model(self.model_region_dir_fully) + + self.ls_imgs = os.listdir(self.dir_in) + + if dir_in and not light_version: + config = tf.compat.v1.ConfigProto() + config.gpu_options.allow_growth = True + session = tf.compat.v1.Session(config=config) + set_session(session) + + self.model_page = self.our_load_model(self.model_page_dir) + self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) + self.model_bin = self.our_load_model(self.model_dir_of_binarization) + self.model_textline = self.our_load_model(self.model_textline_dir) + self.model_region = self.our_load_model(self.model_region_dir_p_ens) + self.model_region_p2 = self.our_load_model(self.model_region_dir_p2) + self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) + self.model_region_fl = self.our_load_model(self.model_region_dir_fully) + self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) + + self.ls_imgs = os.listdir(self.dir_in) + + def _cache_images(self, image_filename=None, image_pil=None): ret = {} if image_filename: @@ -161,7 +204,15 @@ class Eynollah: for prefix in ('', '_grayscale'): ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8) return ret - + def reset_file_name_dir(self, image_filename): + self._imgs = self._cache_images(image_filename=image_filename) + self.image_filename = image_filename + + self.writer = EynollahXmlWriter( + dir_out=self.dir_out, + image_filename=self.image_filename, + curved_line=self.curved_line, + pcgts=self.pcgts) def imread(self, grayscale=False, uint8=True): key = 'img' if grayscale: @@ -335,7 +386,8 @@ class Eynollah: img = self.imread() _, page_coord = self.early_page_for_num_of_column_classification(img) - model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) + if not self.dir_in: + model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) if self.input_binary: img_in = np.copy(img) img_in = img_in / 255.0 @@ -357,18 +409,19 @@ class Eynollah: img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] - - label_p_pred = model_num_classifier.predict(img_in) + if not self.dir_in: + label_p_pred = model_num_classifier.predict(img_in) + else: + label_p_pred = self.model_classifier.predict(img_in) num_col = np.argmax(label_p_pred[0]) + 1 self.logger.info("Found %s columns (%s)", num_col, label_p_pred) - - session_col_classifier.close() - - del model_num_classifier - del session_col_classifier - - K.clear_session() + if not self.dir_in: + session_col_classifier.close() + + del model_num_classifier + del session_col_classifier + K.clear_session() gc.collect() @@ -383,25 +436,27 @@ class Eynollah: def resize_and_enhance_image_with_column_classifier(self,light_version): self.logger.debug("enter resize_and_enhance_image_with_column_classifier") - if light_version: - dpi = 300 - else: - dpi = self.dpi - self.logger.info("Detected %s DPI", dpi) + dpi = self.dpi + self.logger.info("Detected %s DPI", dpi) if self.input_binary: img = self.imread() - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img, model_bin) + if self.dir_in: + prediction_bin = self.do_prediction(True, img, self.model_bin) + else: + + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img, model_bin) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - session_bin.close() - del model_bin - del session_bin + + if not self.dir_in: + session_bin.close() + del model_bin + del session_bin gc.collect() prediction_bin = prediction_bin.astype(np.uint8) @@ -412,7 +467,8 @@ class Eynollah: img_bin = None _, page_coord = self.early_page_for_num_of_column_classification(img_bin) - model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) + if not self.dir_in: + model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) if self.input_binary: img_in = np.copy(img) @@ -433,23 +489,29 @@ class Eynollah: img_in[0, :, :, 2] = img_1ch[:, :] - - label_p_pred = model_num_classifier.predict(img_in) + if self.dir_in: + label_p_pred = self.model_classifier.predict(img_in) + else: + label_p_pred = model_num_classifier.predict(img_in) num_col = np.argmax(label_p_pred[0]) + 1 self.logger.info("Found %s columns (%s)", num_col, label_p_pred) - session_col_classifier.close() - K.clear_session() + if not self.dir_in: + session_col_classifier.close() + K.clear_session() if dpi < DPI_THRESHOLD: img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) - image_res = self.predict_enhancement(img_new) + if light_version: + image_res = np.copy(img_new) + else: + image_res = self.predict_enhancement(img_new) is_image_enhanced = True else: num_column_is_classified = True image_res = np.copy(img) is_image_enhanced = False - - session_col_classifier.close() + if not self.dir_in: + session_col_classifier.close() self.logger.debug("exit resize_and_enhance_image_with_column_classifier") @@ -595,48 +657,48 @@ class Eynollah: if i == 0 and j == 0: seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg + #seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] + #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color elif i == nxf - 1 and j == nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg + #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] + #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color elif i == 0 and j == nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg + #seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color elif i == nxf - 1 and j == 0: seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg + #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] + #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color elif i == 0 and j != 0 and j != nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg + #seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color elif i == nxf - 1 and j != 0 and j != nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg + #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] + #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color elif i != 0 and i != nxf - 1 and j == 0: seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg + #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] + #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color elif i != 0 and i != nxf - 1 and j == nyf - 1: seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg + #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color else: seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg + #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color prediction_true = prediction_true.astype(np.uint8) @@ -817,10 +879,13 @@ class Eynollah: img = img.astype(np.uint8) else: img = self.imread() - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + if not self.dir_in: + model_page, session_page = self.start_new_session_and_model(self.model_page_dir) img = cv2.GaussianBlur(img, (5, 5), 0) - - img_page_prediction = self.do_prediction(False, img, model_page) + if self.dir_in: + img_page_prediction = self.do_prediction(False, img, self.model_page) + else: + img_page_prediction = self.do_prediction(False, img, model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) @@ -834,20 +899,25 @@ class Eynollah: else: box = [0, 0, img.shape[1], img.shape[0]] croped_page, page_coord = crop_image_inside_box(box, img) - session_page.close() - del model_page - del session_page + if not self.dir_in: + session_page.close() + del model_page + del session_page + K.clear_session() gc.collect() - K.clear_session() self.logger.debug("exit early_page_for_num_of_column_classification") return croped_page, page_coord def extract_page(self): self.logger.debug("enter extract_page") cont_page = [] - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + if not self.dir_in: + model_page, session_page = self.start_new_session_and_model(self.model_page_dir) img = cv2.GaussianBlur(self.image, (5, 5), 0) - img_page_prediction = self.do_prediction(False, img, model_page) + if not self.dir_in: + img_page_prediction = self.do_prediction(False, img, model_page) + else: + img_page_prediction = self.do_prediction(False, img, self.model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) thresh = cv2.dilate(thresh, KERNEL, iterations=3) @@ -873,11 +943,12 @@ class Eynollah: box = [0, 0, img.shape[1], img.shape[0]] croped_page, page_coord = crop_image_inside_box(box, self.image) cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - session_page.close() - del model_page - del session_page + if not self.dir_in: + session_page.close() + del model_page + del session_page + K.clear_session() gc.collect() - K.clear_session() self.logger.debug("exit extract_page") return croped_page, page_coord, cont_page @@ -888,10 +959,14 @@ class Eynollah: img = img.astype(np.uint8) else: img = self.imread() - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + if not self.dir_in: + model_page, session_page = self.start_new_session_and_model(self.model_page_dir) img = cv2.GaussianBlur(img, (5, 5), 0) - - img_page_prediction = self.do_prediction(False, img, model_page) + + if self.dir_in: + img_page_prediction = self.do_prediction(False, img, self.model_page) + else: + img_page_prediction = self.do_prediction(False, img, model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) @@ -905,20 +980,28 @@ class Eynollah: else: box = [0, 0, img.shape[1], img.shape[0]] croped_page, page_coord = crop_image_inside_box(box, img) - session_page.close() - del model_page - del session_page + + if not self.dir_in: + session_page.close() + del model_page + del session_page + K.clear_session() + gc.collect() - K.clear_session() + self.logger.debug("exit early_page_for_num_of_column_classification") return croped_page, page_coord def extract_page(self): self.logger.debug("enter extract_page") cont_page = [] - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + if not self.dir_in: + model_page, session_page = self.start_new_session_and_model(self.model_page_dir) img = cv2.GaussianBlur(self.image, (5, 5), 0) - img_page_prediction = self.do_prediction(False, img, model_page) + if not self.dir_in: + img_page_prediction = self.do_prediction(False, img, model_page) + else: + img_page_prediction = self.do_prediction(False, img, self.model_page) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) thresh = cv2.dilate(thresh, KERNEL, iterations=3) @@ -944,11 +1027,12 @@ class Eynollah: box = [0, 0, img.shape[1], img.shape[0]] croped_page, page_coord = crop_image_inside_box(box, self.image) cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - session_page.close() - del model_page - del session_page + if not self.dir_in: + session_page.close() + del model_page + del session_page + K.clear_session() gc.collect() - K.clear_session() self.logger.debug("exit extract_page") return croped_page, page_coord, cont_page @@ -956,8 +1040,10 @@ class Eynollah: self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] img_width_h = img.shape[1] - - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np) + if not self.dir_in: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np) + else: + model_region = self.model_region_fl if patches else self.model_region_fl_np if not patches: img = otsu_copy_binary(img) @@ -1043,10 +1129,11 @@ class Eynollah: marginal_of_patch_percent = 0.1 prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) - - session_region.close() - del model_region - del session_region + + if not self.dir_in: + session_region.close() + del model_region + del session_region gc.collect() self.logger.debug("exit extract_text_regions") @@ -1422,19 +1509,26 @@ class Eynollah: def textline_contours(self, img, patches, scaler_h, scaler_w): self.logger.debug('enter textline_contours') - - model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) + if not self.dir_in: + model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - prediction_textline = self.do_prediction(patches, img, model_textline) + if not self.dir_in: + prediction_textline = self.do_prediction(patches, img, model_textline) + else: + prediction_textline = self.do_prediction(patches, img, self.model_textline) prediction_textline = resize_image(prediction_textline, img_h, img_w) - prediction_textline_longshot = self.do_prediction(False, img, model_textline) + if not self.dir_in: + prediction_textline_longshot = self.do_prediction(False, img, model_textline) + else: + prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) - - session_textline.close() + + if not self.dir_in: + session_textline.close() return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0] @@ -1508,20 +1602,20 @@ class Eynollah: ##img_resized = resize_image(img_bin,img_height_h, img_width_h ) img_resized = resize_image(img,img_h_new, img_w_new ) - tbin = time.time() - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - print("time bin session", time.time()-tbin) - prediction_bin = self.do_prediction(True, img_resized, model_bin) - print("time bin all ", time.time()-tbin) + if not self.dir_in: + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_resized, model_bin) + else: + prediction_bin = self.do_prediction(True, img_resized, self.model_bin) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - session_bin.close() - del model_bin - del session_bin + if not self.dir_in: + session_bin.close() + del model_bin + del session_bin gc.collect() prediction_bin = prediction_bin.astype(np.uint16) @@ -1530,18 +1624,14 @@ class Eynollah: - - tline = time.time() textline_mask_tot_ea = self.run_textline(img_bin) - print("time line all ", time.time()-tline) - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - - - #plt.imshow(img_bin) - #plt.show() - - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) - + + if not self.dir_in: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) + else: + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() @@ -1564,7 +1654,6 @@ class Eynollah: polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) - text_regions_p_true = np.zeros(prediction_regions_org.shape) text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) @@ -1574,7 +1663,8 @@ class Eynollah: text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) #erosion_hurts = True - K.clear_session() + if not self.dir_in: + K.clear_session() return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): @@ -1583,15 +1673,18 @@ class Eynollah: img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] - - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + + if not self.dir_in: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) ratio_y=1.3 ratio_x=1 img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - - prediction_regions_org_y = self.do_prediction(True, img, model_region) + if not self.dir_in: + prediction_regions_org_y = self.do_prediction(True, img, model_region) + else: + prediction_regions_org_y = self.do_prediction(True, img, self.model_region) prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h ) #plt.imshow(prediction_regions_org_y[:,:,0]) @@ -1609,8 +1702,11 @@ class Eynollah: _, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1))) - - prediction_regions_org = self.do_prediction(True, img, model_region) + + if self.dir_in: + prediction_regions_org = self.do_prediction(True, img, self.model_region) + else: + prediction_regions_org = self.do_prediction(True, img, model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) ##plt.imshow(prediction_regions_org[:,:,0]) @@ -1618,20 +1714,26 @@ class Eynollah: prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros_y[:,:]==1)]=0 - session_region.close() - del model_region - del session_region + if not self.dir_in: + session_region.close() + del model_region + del session_region gc.collect() - - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2) + + if not self.dir_in: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2) img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) - prediction_regions_org2 = self.do_prediction(True, img, model_region, 0.2) + + if self.dir_in: + prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, 0.2) + else: + prediction_regions_org2 = self.do_prediction(True, img, model_region, 0.2) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) - - session_region.close() - del model_region - del session_region + if not self.dir_in: + session_region.close() + del model_region + del session_region gc.collect() mask_zeros2 = (prediction_regions_org2[:,:,0] == 0) @@ -1663,8 +1765,11 @@ class Eynollah: if self.input_binary: prediction_bin = np.copy(img_org) else: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_org, model_bin) + if not self.dir_in: + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_org, model_bin) + else: + prediction_bin = self.do_prediction(True, img_org, self.model_bin) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin=prediction_bin[:,:,0] @@ -1672,29 +1777,34 @@ class Eynollah: prediction_bin = prediction_bin*255 prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - session_bin.close() - del model_bin - del session_bin + + if not self.dir_in: + session_bin.close() + del model_bin + del session_bin gc.collect() - - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + if not self.dir_in: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) ratio_y=1 ratio_x=1 img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - - prediction_regions_org = self.do_prediction(True, img, model_region) + + if not self.dir_in: + prediction_regions_org = self.do_prediction(True, img, model_region) + else: + prediction_regions_org = self.do_prediction(True, img, self.model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] mask_lines_only=(prediction_regions_org[:,:]==3)*1 - session_region.close() - del model_region - del session_region + if not self.dir_in: + session_region.close() + del model_region + del session_region gc.collect() @@ -1716,21 +1826,25 @@ class Eynollah: text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) - - K.clear_session() + if not self.dir_in: + K.clear_session() return text_regions_p_true, erosion_hurts, polygons_lines_xml except: if self.input_binary: prediction_bin = np.copy(img_org) else: - session_region.close() - del model_region - del session_region + if not self.dir_in: + session_region.close() + del model_region + del session_region gc.collect() - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_org, model_bin) + if not self.dir_in: + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_org, model_bin) + else: + prediction_bin = self.do_prediction(True, img_org, self.model_bin) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin=prediction_bin[:,:,0] @@ -1741,29 +1855,32 @@ class Eynollah: prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - session_bin.close() - del model_bin - del session_bin + if not self.dir_in: + session_bin.close() + del model_bin + del session_bin gc.collect() - - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + if not self.dir_in: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) ratio_y=1 ratio_x=1 img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - - prediction_regions_org = self.do_prediction(True, img, model_region) + if not self.dir_in: + prediction_regions_org = self.do_prediction(True, img, model_region) + else: + prediction_regions_org = self.do_prediction(True, img, self.model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] #mask_lines_only=(prediction_regions_org[:,:]==3)*1 - session_region.close() - del model_region - del session_region + if not self.dir_in: + session_region.close() + del model_region + del session_region gc.collect() #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) @@ -1807,7 +1924,8 @@ class Eynollah: text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) erosion_hurts = True - K.clear_session() + if not self.dir_in: + K.clear_session() return text_regions_p_true, erosion_hurts, polygons_lines_xml def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): @@ -2409,7 +2527,7 @@ class Eynollah: ##print(img_only_regions.shape,'img_only_regions') ##plt.imshow(img_only_regions[:,:]) ##plt.show() - num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) + ##num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) try: num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) num_col = num_col + 1 @@ -2468,7 +2586,8 @@ class Eynollah: self.logger.info("Resizing and enhancing image...") is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier(light_version) self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ') - K.clear_session() + if not self.dir_in: + K.clear_session() scale = 1 if is_image_enhanced: if self.allow_enhancement: @@ -2492,7 +2611,8 @@ class Eynollah: scaler_h_textline = 1 # 1.2#1.2 scaler_w_textline = 1 # 0.9#1 textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline) - K.clear_session() + if not self.dir_in: + K.clear_session() if self.plotter: self.plotter.save_plot_of_textlines(textline_mask_tot_ea, image_page) return textline_mask_tot_ea @@ -2554,7 +2674,8 @@ class Eynollah: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - K.clear_session() + if not self.dir_in: + K.clear_session() self.logger.info("num_col_classifier: %s", num_col_classifier) @@ -2619,8 +2740,8 @@ class Eynollah: pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - - K.clear_session() + if not self.dir_in: + K.clear_session() self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables @@ -2651,7 +2772,8 @@ class Eynollah: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, splitter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),num_col_classifier, self.tables, pixel_lines) - K.clear_session() + if not self.dir_in: + K.clear_session() gc.collect() if num_col_classifier>=3: @@ -2718,22 +2840,24 @@ class Eynollah: text_regions_p[:, :][text_regions_p[:, :] == 2] = 5 text_regions_p[:, :][text_regions_p[:, :] == 3] = 6 text_regions_p[:, :][text_regions_p[:, :] == 4] = 8 - - K.clear_session() + if not self.dir_in: + K.clear_session() image_page = image_page.astype(np.uint8) regions_fully, regions_fully_only_drop = self.extract_text_regions(image_page, True, cols=num_col_classifier) text_regions_p[:,:][regions_fully[:,:,0]==6]=6 regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 - K.clear_session() + if not self.dir_in: + K.clear_session() # plt.imshow(regions_fully[:,:,0]) # plt.show() regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully) # plt.imshow(regions_fully[:,:,0]) # plt.show() - K.clear_session() + if not self.dir_in: + K.clear_session() regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) # plt.imshow(regions_fully_np[:,:,0]) # plt.show() @@ -2744,7 +2868,8 @@ class Eynollah: # plt.imshow(regions_fully_np[:,:,0]) # plt.show() - K.clear_session() + if not self.dir_in: + K.clear_session() # plt.imshow(regions_fully[:,:,0]) # plt.show() regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) @@ -2769,12 +2894,18 @@ class Eynollah: regions_without_separators_d = None if not self.tables: regions_without_separators = (text_regions_p[:, :] == 1) * 1 - - K.clear_session() + if not self.dir_in: + K.clear_session() img_revised_tab = np.copy(text_regions_p[:, :]) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) self.logger.debug('exit run_boxes_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables + + def our_load_model(self, model_file): + + model = load_model(model_file, compile=False) + + return model def run(self): """ @@ -2782,278 +2913,270 @@ class Eynollah: """ self.logger.debug("enter run") - t0 = time.time() - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) - - self.logger.info("Enhancing took %.1fs ", time.time() - t0) + t0_tot = time.time() - t1 = time.time() - if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_from_xy_2models_light(img_res, is_image_enhanced, num_col_classifier) + if not self.dir_in: + self.ls_imgs = [1] + + for img_name in self.ls_imgs: + t0 = time.time() + if self.dir_in: + self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + + self.logger.info("Enhancing took %.1fs ", time.time() - t0) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts) - else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - self.logger.info('cont_page %s', cont_page) + if self.light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_from_xy_2models_light(img_res, is_image_enhanced, num_col_classifier) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts) + #self.logger.info("run graphics %.1fs ", time.time() - t1t) + else: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], []) - self.logger.info("Job done in %.1fs", time.time() - t1) - return pcgts + t1 = time.time() + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + #self.logger.info('cont_page %s', cont_page) + + if not num_col: + self.logger.info("No columns detected, outputting an empty PAGE-XML") + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], []) + self.logger.info("Job done in %.1fs", time.time() - t1) + return pcgts - t1 = time.time() - if not self.light_version: - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) + t1 = time.time() + if not self.light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) - t1 = time.time() - #plt.imshow(table_prediction) - #plt.show() + #plt.imshow(table_prediction) + #plt.show() - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - self.logger.info("detection of marginals took %.1fs", time.time() - t1) - t1 = time.time() - if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - - if self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts) - text_only = ((img_revised_tab[:, :] == 1)) * 1 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + self.logger.info("detection of marginals took %.1fs", time.time() - t1) + t1 = time.time() + if not self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - min_con_area = 0.000005 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(contours_only_text_parent[j]) for j in range(len(contours_only_text_parent))]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [contours_only_text_parent[jz] for jz in range(len(contours_only_text_parent)) if areas_cnt_text[jz] > min_con_area] - areas_cnt_text_parent = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > min_con_area] - - index_con_parents = np.argsort(areas_cnt_text_parent) - contours_only_text_parent = list(np.array(contours_only_text_parent)[index_con_parents]) - areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - - contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) - contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - - areas_cnt_text_d = np.array([cv2.contourArea(contours_only_text_parent_d[j]) for j in range(len(contours_only_text_parent_d))]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) - - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d=np.argsort(areas_cnt_text_d) - contours_only_text_parent_d=list(np.array(contours_only_text_parent_d)[index_con_parents_d] ) - areas_cnt_text_d=list(np.array(areas_cnt_text_d)[index_con_parents_d] ) - - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + if self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts) + text_only = ((img_revised_tab[:, :] == 1)) * 1 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + + + min_con_area = 0.000005 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(why) - - (h, w) = text_only.shape[:2] - center = (w // 2.0, h // 2.0) - M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) - M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big - - contours_only_text_parent_d_ordered = [] - for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(contours_only_text_parent[j]) for j in range(len(contours_only_text_parent))]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [contours_only_text_parent[jz] for jz in range(len(contours_only_text_parent)) if areas_cnt_text[jz] > min_con_area] + areas_cnt_text_parent = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > min_con_area] + + index_con_parents = np.argsort(areas_cnt_text_parent) + contours_only_text_parent = list(np.array(contours_only_text_parent)[index_con_parents]) + areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + + contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) + contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + + areas_cnt_text_d = np.array([cv2.contourArea(contours_only_text_parent_d[j]) for j in range(len(contours_only_text_parent_d))]) + areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + + if len(areas_cnt_text_d)>0: + contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + index_con_parents_d=np.argsort(areas_cnt_text_d) + contours_only_text_parent_d=list(np.array(contours_only_text_parent_d)[index_con_parents_d] ) + areas_cnt_text_d=list(np.array(areas_cnt_text_d)[index_con_parents_d] ) + + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + try: + if len(cx_bigest_d) >= 5: + cx_bigest_d_last5 = cx_bigest_d[-5:] + cy_biggest_d_last5 = cy_biggest_d[-5:] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + else: + cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] + cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] + cy_biggest_d_big[0] = cy_biggest_d[ind_largest] + except Exception as why: + self.logger.error(why) + + (h, w) = text_only.shape[:2] + center = (w // 2.0, h // 2.0) + M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) + M_22 = np.array(M)[:2, :2] + p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) + x_diff = p_big[0] - cx_bigest_d_big + y_diff = p_big[1] - cy_biggest_d_big + + contours_only_text_parent_d_ordered = [] + for i in range(len(contours_only_text_parent)): + p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) + p[0] = p[0] - x_diff[0] + p[1] = p[1] - y_diff[0] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) + # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) + # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) + # plt.imshow(img2[:,:,0]) + # plt.show() + else: + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] + else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - else: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(contours_only_text_parent[j]) for j in range(len(contours_only_text_parent))]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [contours_only_text_parent[jz] for jz in range(len(contours_only_text_parent)) if areas_cnt_text[jz] > min_con_area] - areas_cnt_text_parent = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > min_con_area] - - index_con_parents = np.argsort(areas_cnt_text_parent) - contours_only_text_parent = list(np.array(contours_only_text_parent)[index_con_parents]) - areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) - # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) - # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) - else: - pass - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - - if not self.curved_line: + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(contours_only_text_parent[j]) for j in range(len(contours_only_text_parent))]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [contours_only_text_parent[jz] for jz in range(len(contours_only_text_parent)) if areas_cnt_text[jz] > min_con_area] + areas_cnt_text_parent = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > min_con_area] + + index_con_parents = np.argsort(areas_cnt_text_parent) + contours_only_text_parent = list(np.array(contours_only_text_parent)[index_con_parents]) + areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) + # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) + # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) + else: + pass if self.light_version: - slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) else: - slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - else: - - scale_param = 1 - all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_texline_polygons = small_textlines_to_parent_adherence2(all_found_texline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_texline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_texline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - K.clear_session() - if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con]) - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered) + txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + + if not self.curved_line: + if self.light_version: + slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + else: + slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: - contours_only_text_parent_d_ordered = None - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered) + + scale_param = 1 + all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_texline_polygons = small_textlines_to_parent_adherence2(all_found_texline_polygons, textline_mask_tot_ea, num_col_classifier) + all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_texline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_texline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + if not self.dir_in: + K.clear_session() + + if self.full_layout: + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con]) + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered) + else: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered) + else: + #takes long timee + contours_only_text_parent_d_ordered = None + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered) + else: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered) - if self.plotter: - self.plotter.save_plot_of_layout(text_regions_p, image_page) - self.plotter.save_plot_of_layout_all(text_regions_p, image_page) + if self.plotter: + self.plotter.save_plot_of_layout(text_regions_p, image_page) + self.plotter.save_plot_of_layout_all(text_regions_p, image_page) + if not self.dir_in: + K.clear_session() + + pixel_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + all_found_texline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) + pixel_lines = 6 + - K.clear_session() + if not self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_texline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) + if num_col_classifier >= 3: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - # print(len(contours_only_text_parent_h),len(contours_only_text_parent_h_d_ordered),'contours_only_text_parent_h') - pixel_lines = 6 - + else: + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + - if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables) + + if self.plotter: + self.plotter.write_images_into_directory(polygons_of_images, image_page) + t_order = time.time() + if self.full_layout: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - # print(peaks_neg_fin,peaks_neg_fin_d,'num_col2') - # print(splitter_y_new,splitter_y_new_d,'num_col_classifier') - # print(matrix_of_lines_ch.shape,matrix_of_lines_ch_d.shape,'matrix_of_lines_ch') - - if num_col_classifier >= 3: + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) + self.logger.info("Job done in %.1fs", time.time() - t0) + ##return pcgts + else: + contours_only_text_parent_h = None if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - - #regions_without_separators_0 = regions_without_separators[:, :].sum(axis=0) - #meda_n_updown = regions_without_separators_0[len(regions_without_separators_0) :: -1] - #first_nonzero = next((i for i, x in enumerate(regions_without_separators_0) if x), 0) - #last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0) - #last_nonzero = len(regions_without_separators_0) - last_nonzero - - #random_pixels_for_image = np.random.randn(regions_without_separators.shape[0], regions_without_separators.shape[1]) - #random_pixels_for_image[random_pixels_for_image < -0.5] = 0 - #random_pixels_for_image[random_pixels_for_image != 0] = 1 - #regions_without_separators[(random_pixels_for_image[:, :] == 1) & (text_regions_p[:, :] == 5)] = 1 - - #regions_without_separators[:, 0:first_nonzero] = 0 - #regions_without_separators[:, last_nonzero:] = 0 + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - - #regions_without_separators_0 = regions_without_separators_d[:, :].sum(axis=0) - #meda_n_updown = regions_without_separators_0[len(regions_without_separators_0) :: -1] - #first_nonzero = next((i for i, x in enumerate(regions_without_separators_0) if x), 0) - #last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0) - #last_nonzero = len(regions_without_separators_0) - last_nonzero - - #random_pixels_for_image = np.random.randn(regions_without_separators_d.shape[0], regions_without_separators_d.shape[1]) - #random_pixels_for_image[random_pixels_for_image < -0.5] = 0 - #random_pixels_for_image[random_pixels_for_image != 0] = 1 - ##regions_without_separators_d[(random_pixels_for_image[:, :] == 1) & (text_regions_p_1_n[:, :] == 5)] = 1 - - #regions_without_separators_d[:, 0:first_nonzero] = 0 - #regions_without_separators_d[:, last_nonzero:] = 0 - - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables) - else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables) - - if self.plotter: - self.plotter.write_images_into_directory(polygons_of_images, image_page) - - if self.full_layout: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) - else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) - self.logger.info("Job done in %.1fs", time.time() - t0) - return pcgts - else: - contours_only_text_parent_h = None - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) - else: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_texline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) - self.logger.info("Job done in %.1fs", time.time() - t0) - return pcgts + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_texline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) + self.logger.info("Job done in %.1fs", time.time() - t0) + ##return pcgts + self.writer.write_pagexml(pcgts) + #self.logger.info("Job done in %.1fs", time.time() - t0) + self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index 2533455..da14139 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -797,6 +797,76 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): return layout_in_patch def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_texline_polygons,slopes,contours_only_text_parent_d_ordered): + + cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent) + + length_con=x_max_main-x_min_main + height_con=y_max_main-y_min_main + + + + all_found_texline_polygons_main=[] + all_found_texline_polygons_head=[] + + all_box_coord_main=[] + all_box_coord_head=[] + + slopes_main=[] + slopes_head=[] + + contours_only_text_parent_main=[] + contours_only_text_parent_head=[] + + contours_only_text_parent_main_d=[] + contours_only_text_parent_head_d=[] + + for ii in range(len(contours_only_text_parent)): + con=contours_only_text_parent[ii] + img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) + img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) + + + + all_pixels=((img[:,:,0]==255)*1).sum() + + pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() + pixels_main=all_pixels-pixels_header + + + if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 + contours_only_text_parent_head.append(con) + if contours_only_text_parent_d_ordered is not None: + contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) + all_box_coord_head.append(all_box_coord[ii]) + slopes_head.append(slopes[ii]) + all_found_texline_polygons_head.append(all_found_texline_polygons[ii]) + else: + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 + contours_only_text_parent_main.append(con) + if contours_only_text_parent_d_ordered is not None: + contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) + all_box_coord_main.append(all_box_coord[ii]) + slopes_main.append(slopes[ii]) + all_found_texline_polygons_main.append(all_found_texline_polygons[ii]) + + #print(all_pixels,pixels_main,pixels_header) + + return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_texline_polygons_main,all_found_texline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d + + +def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_texline_polygons,slopes,contours_only_text_parent_d_ordered): + + ### to make it faster + h_o = regions_model_1.shape[0] + w_o = regions_model_1.shape[1] + + regions_model_1 = cv2.resize(regions_model_1, (int(regions_model_1.shape[1]/3.), int(regions_model_1.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) + regions_model_full = cv2.resize(regions_model_full, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) + contours_only_text_parent = [ (i/3.).astype(np.int32) for i in contours_only_text_parent] + + ### + cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent) length_con=x_max_main-x_min_main @@ -853,8 +923,14 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions - #plt.imshow(img[:,:,0]) - #plt.show() + ### to make it faster + + regions_model_1 = cv2.resize(regions_model_1, (w_o, h_o), interpolation=cv2.INTER_NEAREST) + #regions_model_full = cv2.resize(img, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) + contours_only_text_parent_head = [ (i*3.).astype(np.int32) for i in contours_only_text_parent_head] + contours_only_text_parent_main = [ (i*3.).astype(np.int32) for i in contours_only_text_parent_main] + ### + return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_texline_polygons_main,all_found_texline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col): diff --git a/qurator/eynollah/utils/contour.py b/qurator/eynollah/utils/contour.py index 6b81391..b29b5b6 100644 --- a/qurator/eynollah/utils/contour.py +++ b/qurator/eynollah/utils/contour.py @@ -3,7 +3,8 @@ import numpy as np from shapely import geometry from .rotate import rotate_image, rotation_image_new - +from multiprocessing import Process, Queue, cpu_count +from multiprocessing import Pool def contours_in_same_horizon(cy_main_hor): X1 = np.zeros((len(cy_main_hor), len(cy_main_hor))) X2 = np.zeros((len(cy_main_hor), len(cy_main_hor))) @@ -147,6 +148,96 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): return contours_imgs +def do_work_of_contours_in_image(queue_of_all_params, contours_per_process, indexes_r_con_per_pro, img, slope_first): + cnts_org_per_each_subprocess = [] + index_by_text_region_contours = [] + for mv in range(len(contours_per_process)): + index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) + + img_copy = np.zeros(img.shape) + img_copy = cv2.fillPoly(img_copy, pts=[contours_per_process[mv]], color=(1, 1, 1)) + + img_copy = rotation_image_new(img_copy, -slope_first) + + img_copy = img_copy.astype(np.uint8) + imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) + ret, thresh = cv2.threshold(imgray, 0, 255, 0) + + cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) + cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) + + + cnts_org_per_each_subprocess.append(cont_int[0]) + + queue_of_all_params.put([ cnts_org_per_each_subprocess, index_by_text_region_contours]) + + +def get_textregion_contours_in_org_image_multi(cnts, img, slope_first): + + num_cores = cpu_count() + queue_of_all_params = Queue() + + processes = [] + nh = np.linspace(0, len(cnts), num_cores + 1) + indexes_by_text_con = np.array(range(len(cnts))) + for i in range(num_cores): + contours_per_process = cnts[int(nh[i]) : int(nh[i + 1])] + indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] + + processes.append(Process(target=do_work_of_contours_in_image, args=(queue_of_all_params, contours_per_process, indexes_text_con_per_process, img,slope_first ))) + for i in range(num_cores): + processes[i].start() + cnts_org = [] + all_index_text_con = [] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + contours_for_sub_process = list_all_par[0] + indexes_for_sub_process = list_all_par[1] + for j in range(len(contours_for_sub_process)): + cnts_org.append(contours_for_sub_process[j]) + all_index_text_con.append(indexes_for_sub_process[j]) + for i in range(num_cores): + processes[i].join() + + print(all_index_text_con) + return cnts_org +def loop_contour_image(index_l, cnts,img, slope_first): + img_copy = np.zeros(img.shape) + img_copy = cv2.fillPoly(img_copy, pts=[cnts[index_l]], color=(1, 1, 1)) + + # plt.imshow(img_copy) + # plt.show() + + # print(img.shape,'img') + img_copy = rotation_image_new(img_copy, -slope_first) + ##print(img_copy.shape,'img_copy') + # plt.imshow(img_copy) + # plt.show() + + img_copy = img_copy.astype(np.uint8) + imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) + ret, thresh = cv2.threshold(imgray, 0, 255, 0) + + cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) + cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) + # print(np.shape(cont_int[0])) + return cont_int[0] + +def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first): + + cnts_org = [] + # print(cnts,'cnts') + with Pool(cpu_count()) as p: + cnts_org = p.starmap(loop_contour_image, [(index_l,cnts, img,slope_first) for index_l in range(len(cnts))]) + + print(len(cnts_org),'lendiha') + + return cnts_org + def get_textregion_contours_in_org_image(cnts, img, slope_first): cnts_org = [] @@ -175,11 +266,43 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): # print(np.shape(cont_int[0])) cnts_org.append(cont_int[0]) - # print(cnts_org,'cnts_org') + return cnts_org + +def get_textregion_contours_in_org_image_light(cnts, img, slope_first): + + h_o = img.shape[0] + w_o = img.shape[1] + + img = cv2.resize(img, (int(img.shape[1]/3.), int(img.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) + ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) + #cnts = cnts/2 + cnts = [(i/ 3).astype(np.int32) for i in cnts] + cnts_org = [] + #print(cnts,'cnts') + for i in range(len(cnts)): + img_copy = np.zeros(img.shape) + img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1)) + + # plt.imshow(img_copy) + # plt.show() + + # print(img.shape,'img') + img_copy = rotation_image_new(img_copy, -slope_first) + ##print(img_copy.shape,'img_copy') + # plt.imshow(img_copy) + # plt.show() + + img_copy = img_copy.astype(np.uint8) + imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) + ret, thresh = cv2.threshold(imgray, 0, 255, 0) + + cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) + cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) + # print(np.shape(cont_int[0])) + cnts_org.append(cont_int[0]*3) - # sys.exit() - # self.y_shift = np.abs(img_copy.shape[0] - img.shape[0]) - # self.x_shift = np.abs(img_copy.shape[1] - img.shape[1]) return cnts_org def return_contours_of_interested_textline(region_pre_p, pixel): From 2eacb9a8ec4ba1d2150e3c984d6b0b8678a07cd9 Mon Sep 17 00:00:00 2001 From: vahid Date: Mon, 4 Apr 2022 20:34:59 -0400 Subject: [PATCH 05/67] renaming the models --- qurator/eynollah/eynollah.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b3fca7b..c980866 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -144,18 +144,18 @@ class Eynollah: self.logger = logger if logger else getLogger('eynollah') self.dir_models = dir_models - self.model_dir_of_enhancement = dir_models + "/model_enhancement.h5" - self.model_dir_of_binarization = dir_models + "/model_bin_sbb_ens.h5" - self.model_dir_of_col_classifier = dir_models + "/model_scale_classifier.h5" - self.model_region_dir_p = dir_models + "/model_main_covid19_lr5-5_scale_1_1_great.h5" - self.model_region_dir_p2 = dir_models + "/model_main_home_corona3_rot.h5" - self.model_region_dir_fully_np = dir_models + "/model_no_patches_class0_30eopch.h5" - self.model_region_dir_fully = dir_models + "/model_3up_new_good_no_augmentation.h5" - self.model_page_dir = dir_models + "/model_page_mixed_best.h5" - self.model_region_dir_p_ens = dir_models + "/model_ensemble_s.h5" - self.model_region_dir_p_ens_light = dir_models + "/model_11.h5" - self.model_textline_dir = dir_models + "/model_textline_newspapers.h5" - self.model_tables = dir_models + "/model_tables_ens_mixed_new_2.h5" + self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425.h5" + self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425.h5" + self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425.h5" + self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425.h5" + self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425.h5" + self.model_region_dir_fully_np = dir_models + "/eynollah-full-regions-1column_20210425.h5" + self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425.h5" + self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425.h5" + self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425.h5" + self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314.h5" + self.model_textline_dir = dir_models + "/eynollah-textline_20210425.h5" + self.model_tables = dir_models + "/eynollah-tables_20210319.h5" if dir_in and light_version: config = tf.compat.v1.ConfigProto() From 8d19c4c6320419125f3e178a5d990217522fd667 Mon Sep 17 00:00:00 2001 From: vahid Date: Mon, 4 Apr 2022 20:45:12 -0400 Subject: [PATCH 06/67] updating readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 7673954..506e93d 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,8 @@ eynollah \ -ho \ -sl \ -ep +-light +-di ``` From 94c3b0fc286c6c7b051eea2501ca2478a6d118c4 Mon Sep 17 00:00:00 2001 From: vahid Date: Mon, 4 Apr 2022 20:48:21 -0400 Subject: [PATCH 07/67] updating readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 506e93d..afafb3a 100644 --- a/README.md +++ b/README.md @@ -94,8 +94,8 @@ eynollah \ -ho \ -sl \ -ep --light --di +-light +-di ``` From e564451861d9c6ee3db5e859d87b14354a6db44c Mon Sep 17 00:00:00 2001 From: vahid Date: Mon, 4 Apr 2022 21:13:21 -0400 Subject: [PATCH 08/67] updating readme --- README.md | 176 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 113 insertions(+), 63 deletions(-) diff --git a/README.md b/README.md index afafb3a..0232db7 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,68 @@ # Eynollah -> Document Layout Analysis +> Perform document layout analysis (segmentation) from image data and return the results as [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML). ![](https://user-images.githubusercontent.com/952378/102350683-8a74db80-3fa5-11eb-8c7e-f743f7d6eae2.jpg) -## Introduction -This tool performs document layout analysis (segmentation) from image data and returns the results as [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML). +## Installation +`pip install .` or + +`pip install . -e` for editable installation + +Alternatively, you can also use `make` with these targets: + +`make install` or + +`make install-dev` for editable installation + +### Models + +In order to run this tool you need trained models. You can download our pretrained models from [qurator-data.de](https://qurator-data.de/eynollah/). -It can currently detect the following layout classes/elements: +Alternatively, running `make models` will download and extract models to `$(PWD)/models_eynollah`. + +### Training + +In case you want to train your own model to use with Eynollah, have a look at [sbb_pixelwise_segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation). + +## Usage + +The command-line interface can be called like this: + +```sh +eynollah \ +-i \ +-o \ +-m \ +-fl \ +-ae \ +-as \ +-cl \ +-si \ +-sd \ +-sa \ +-tab \ +-ib \ +-ho \ +-sl \ +-ep +-light +-di + +``` + +The tool performs better with RGB images than greyscale/binarized images. + +## Documentation + +
+ click to expand/collapse + +### Region types + +
+ click to expand/collapse
+ +Eynollah can currently be used to detect the following region types/elements: * [Border](https://ocr-d.de/en/gt-guidelines/pagexml/pagecontent_xsd_Complex_Type_pc_BorderType.html) * [Textregion](https://ocr-d.de/en/gt-guidelines/pagexml/pagecontent_xsd_Complex_Type_pc_TextRegionType.html) * [Textline](https://ocr-d.de/en/gt-guidelines/pagexml/pagecontent_xsd_Complex_Type_pc_TextLineType.html) @@ -15,9 +71,16 @@ It can currently detect the following layout classes/elements: * [Marginalia](https://ocr-d.de/en/gt-guidelines/trans/lyMarginalie.html) * [Initial (Drop Capital)](https://ocr-d.de/en/gt-guidelines/trans/lyInitiale.html) -In addition, the tool can be used to detect the _[ReadingOrder](https://ocr-d.de/en/gt-guidelines/trans/lyLeserichtung.html)_ of regions. The final goal is to feed the output to an OCR model. +In addition, the tool can detect the [ReadingOrder](https://ocr-d.de/en/gt-guidelines/trans/lyLeserichtung.html) of regions. The final goal is to feed the output to an OCR model. + +
-The tool uses a combination of various models and heuristics (see flowchart below for the different stages and how they interact): +### Method description + +
+ click to expand/collapse
+ +Eynollah uses a combination of various models and heuristics (see flowchart below for the different stages and how they interact): * [Border detection](https://github.com/qurator-spk/eynollah#border-detection) * [Layout detection](https://github.com/qurator-spk/eynollah#layout-detection) * [Textline detection](https://github.com/qurator-spk/eynollah#textline-detection) @@ -29,22 +92,22 @@ The first three stages are based on [pixel-wise segmentation](https://github.com ![](https://user-images.githubusercontent.com/952378/100619946-1936f680-331e-11eb-9297-6e8b4cab3c16.png) -## Border detection +#### Border detection For the purpose of text recognition (OCR) and in order to avoid noise being introduced from texts outside the printspace, one first needs to detect the border of the printed frame. This is done by a binary pixel-wise-segmentation model trained on a dataset of 2,000 documents where about 1,200 of them come from the [dhSegment](https://github.com/dhlab-epfl/dhSegment/) project (you can download the dataset from [here](https://github.com/dhlab-epfl/dhSegment/releases/download/v0.2/pages.zip)) and the remainder having been annotated in SBB. For border detection, the model needs to be fed with the whole image at once rather than separated in patches. -## Layout detection +### Layout detection As a next step, text regions need to be identified by means of layout detection. Again a pixel-wise segmentation model was trained on 131 labeled images from the SBB digital collections, including some data augmentation. Since the target of this tool are historical documents, we consider as main region types text regions, separators, images, tables and background - each with their own subclasses, e.g. in the case of text regions, subclasses like header/heading, drop capital, main body text etc. While it would be desirable to detect and classify each of these classes in a granular way, there are also limitations due to having a suitably large and balanced training set. Accordingly, the current version of this tool is focussed on the main region types background, text region, image and separator. -## Textline detection +#### Textline detection In a subsequent step, binary pixel-wise segmentation is used again to classify pixels in a document that constitute textlines. For textline segmentation, a model was initially trained on documents with only one column/block of text and some augmentation with regard to scaling. By fine-tuning the parameters also for multi-column documents, additional training data was produced that resulted in a much more robust textline detection model. -## Image enhancement +#### Image enhancement This is an image to image model which input was low quality of an image and label was actually the original image. For this one we did not have any GT, so we decreased the quality of documents in SBB and then feed them into model. -## Scale classification +#### Scale classification This is simply an image classifier which classifies images based on their scales or better to say based on their number of columns. -## Heuristic methods +### Heuristic methods Some heuristic methods are also employed to further improve the model predictions: * After border detection, the largest contour is determined by a bounding box, and the image cropped to these coordinates. * For text region detection, the image is scaled up to make it easier for the model to detect background space between text regions. @@ -53,55 +116,39 @@ Some heuristic methods are also employed to further improve the model prediction * After deskewing, a calculation of the pixel distribution on the X-axis allows the separation of textlines (foreground) and background pixels. * Finally, using the derived coordinates, bounding boxes are determined for each textline. -## Light version -layout detection is implemented in lower scale and with only one model. - -## Installation -`pip install .` or +
+ +### Model description -`pip install . -e` for editable installation +
+ click to expand/collapse
+ +TODO -Alternatively, you can also use `make` with these targets: - -`make install` or - -`make install-dev` for editable installation +
+ +### How to use -### Models +
+ click to expand/collapse
+ +First, this model makes use of up to 9 trained models which are responsible for different operations like size detection, column classification, image enhancement, page extraction, main layout detection, full layout detection and textline detection.That does not mean that all 9 models are always required for every document. Based on the document characteristics and parameters specified, different scenarios can be applied. -In order to run this tool you also need trained models. You can download our pretrained models from [qurator-data.de](https://qurator-data.de/eynollah/). +* If none of the parameters is set to `true`, the tool will perform a layout detection of main regions (background, text, images, separators and marginals). An advantage of this tool is that it tries to extract main text regions separately as much as possible. -Alternatively, running `make models` will download and extract models to `$(PWD)/models_eynollah`. +* If you set `-ae` (**a**llow image **e**nhancement) parameter to `true`, the tool will first check the ppi (pixel-per-inch) of the image and when it is less than 300, the tool will resize it and only then image enhancement will occur. Image enhancement can also take place without this option, but by setting this option to `true`, the layout xml data (e.g. coordinates) will be based on the resized and enhanced image instead of the original image. -## Usage +* For some documents, while the quality is good, their scale is very large, and the performance of tool decreases. In such cases you can set `-as` (**a**llow **s**caling) to `true`. With this option enabled, the tool will try to rescale the image and only then the layout detection process will begin. -The basic command-line interface can be called like this: +* If you care about drop capitals (initials) and headings, you can set `-fl` (**f**ull **l**ayout) to `true`. With this setting, the tool can currently distinguish 7 document layout classes/elements. -```sh -eynollah \ --i \ --o \ --m \ --fl \ --ae \ --as \ --cl \ --si \ --sd \ --sa \ --tab \ --ib \ --ho \ --sl \ --ep --light --di +* In cases where the document includes curved headers or curved lines, rectangular bounding boxes for textlines will not be a great option. In such cases it is strongly recommended setting the flag `-cl` (**c**urved **l**ines) to `true` to find contours of curved lines instead of rectangular bounding boxes. Be advised that enabling this option increases the processing time of the tool. -``` +* To crop and save image regions inside the document, set the parameter `-si` (**s**ave **i**mages) to true and provide a directory path to store the extracted images. -The tool does accept and works better on original images (RGB format) than binarized images. +* This tool is actively being developed. If problems occur, or the performance does not meet your expectations, we welcome your feedback via [issues](https://github.com/qurator-spk/eynollah/issues). -### `--full-layout` vs `--no-full-layout` +#### `--full-layout` vs `--no-full-layout` Here are the difference in elements detected depending on the `--full-layout`/`--no-full-layout` command line flags: @@ -116,20 +163,23 @@ Here are the difference in elements detected depending on the `--full-layout`/`- | marginals / text line | x | x | | image region | x | x | -### How to use +#### Use as OCR-D processor -First, this model makes use of up to 9 trained models which are responsible for different operations like size detection, column classification, image enhancement, page extraction, main layout detection, full layout detection and textline detection.That does not mean that all 9 models are always required for every document. Based on the document characteristics and parameters specified, different scenarios can be applied. +Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor. In this case, the source image file group with (preferably) RGB images should be used as input like this: -* If none of the parameters is set to `true`, the tool will perform a layout detection of main regions (background, text, images, separators and marginals). An advantage of this tool is that it tries to extract main text regions separately as much as possible. - -* If you set `-ae` (**a**llow image **e**nhancement) parameter to `true`, the tool will first check the ppi (pixel-per-inch) of the image and when it is less than 300, the tool will resize it and only then image enhancement will occur. Image enhancement can also take place without this option, but by setting this option to `true`, the layout xml data (e.g. coordinates) will be based on the resized and enhanced image instead of the original image. - -* For some documents, while the quality is good, their scale is very large, and the performance of tool decreases. In such cases you can set `-as` (**a**llow **s**caling) to `true`. With this option enabled, the tool will try to rescale the image and only then the layout detection process will begin. - -* If you care about drop capitals (initials) and headings, you can set `-fl` (**f**ull **l**ayout) to `true`. With this setting, the tool can currently distinguish 7 document layout classes/elements. - -* In cases where the document includes curved headers or curved lines, rectangular bounding boxes for textlines will not be a great option. In such cases it is strongly recommended setting the flag `-cl` (**c**urved **l**ines) to `true` to find contours of curved lines instead of rectangular bounding boxes. Be advised that enabling this option increases the processing time of the tool. +`ocrd-eynollah-segment -I OCR-D-IMG -O SEG-LINE -P models` + +In fact, the image referenced by `@imageFilename` in PAGE-XML is passed on directly to Eynollah as a processor, so that e.g. calling -* To crop and save image regions inside the document, set the parameter `-si` (**s**ave **i**mages) to true and provide a directory path to store the extracted images. +`ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models` + +would still use the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps -* This tool is actively being developed. If problems occur, or the performance does not meet your expectations, we welcome your feedback via [issues](https://github.com/qurator-spk/eynollah/issues). + #### Eynollah "light" + + TODO + +
+ +
+ From 3871e22c358093cb408fac68e8f62ff575658dd9 Mon Sep 17 00:00:00 2001 From: vahid Date: Tue, 12 Apr 2022 00:58:20 +0200 Subject: [PATCH 09/67] how the models are trained --- README.md | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0232db7..7438427 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,24 @@ Some heuristic methods are also employed to further improve the model prediction
click to expand/collapse
-TODO +#### Enhancement model: +The image enhancement model is again an image-to-image model, trained on document images with low quality and GT of corresponding images with higher quality. For training the image enhancement model, a total of 1127 document images underwent 11 different downscaling processes and consequently 11 different qualities for each image were derived. The resulting images were cropped into patches of 672*672 pixels. Adam is used as an optimizer and the learning rate is 1e-4. Scaling is the only augmentation applied for training. The model is trained with a batch size of 2 and for 5 epochs. + +#### Classifier model: +In order to obtain high quality results, it is beneficial to scale the document image to the same scale of the images in the training dataset that the models were trained on. The classifier model predicts the number of columns in a document by creating a training set for that purpose with manual classification of all documents into six classes with either one, two, three, four, five, or six and more columns respectively. Classifier model is a ResNet50+2 dense layers on top. The input size of model is 448*448 and Adam is used as an optimizer and the learning rate is 1e-4. Model is trained for 300 epochs. + +#### Page extractor model: +This a deep learning model which helps to crop the page borders by using a pixel-wise segmentation method. In case of page extraction it is necessary to train the model on the entire (document) image, i.e. full images are resized to the input size of the model (no patches). For training, the model is fed with entire images from the 2820 samples of the extended training set. The input size of the the page extraction model is 448*448 pixels. Adam is used as an optimizer and the learning rate is 1e-6. The model is trained with a batch size of 4 and for 30 epochs. + +#### Early layout model: +The early layout detection model detects only the main and recursive regions in a document like background, text regions, separators and images. In the case of early layout segmentation, we used 381 pages to train the model. The model is fed with patches of size 448*672 pixels. Adam is used as an optimizer and the learning rate is 1e-4. Two models were trained, one with scale augmentation and another one without any augmentation. Both models were trained for 12 epochs and with a batch size of 3. Categorical cross entropy is used as a loss function. + +#### Full layout model: +By full layout detection we have added two more elements of a document structure, drop capitals and headings, onto early layout elements. For the secondary layout segmentation we have trained two models. One is trained with 355 pages containing 3 or more columns and in patches with a size of 896*896 pixels. The other model is trained on 634 pages that have only one column. The second model is fed with the entire image with input size +of 896 * 896 pixels (not in patches). Adam is used as an optimizer and the learning rate is 1e-4. Then both models are trained for 8 epochs with a batch size of 1. Soft dice is used as the loss function. + +#### Text line segmentation model: +For text line segmentation, 342 pages were used for training. The model is trained in patches with the size of 448*672. Adam is used as an optimizer and the learning rate is 1e-4. The training set is augmented with scaling and rotation. The model is trained only for 1 epoch with a batch size of 3. Soft dice is again used as the loss function.
@@ -177,7 +194,7 @@ would still use the original (RGB) image despite any binarization that may have #### Eynollah "light" - TODO + Eynollah light has used a faster method to predict and extract early layout. On other hand with light version deskewing is not applied for any text region and in return it is done for the whole document once. The other option that users have with light version is that instead of image name a folder of images can be given as input and in this case all models will be loaded and then processing for all images will be implemented. This step accelerates process of document analysis. From 3bbbeecfec20ec5a5b1d0113a1a2621e056f2bb3 Mon Sep 17 00:00:00 2001 From: vahid Date: Wed, 20 Apr 2022 15:31:10 +0200 Subject: [PATCH 10/67] all options are enabled for light version --- qurator/eynollah/eynollah.py | 39 +++++++++++++++++++++++++----------- qurator/eynollah/plot.py | 8 ++++---- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c980866..48a640c 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -119,6 +119,12 @@ class Eynollah: self.image_filename = image_filename self.dir_out = dir_out self.dir_in = dir_in + self.dir_of_all = dir_of_all + self.dir_of_deskewed = dir_of_deskewed + self.dir_of_deskewed = dir_of_deskewed + self.dir_of_cropped_images=dir_of_cropped_images + self.dir_of_layout=dir_of_layout + self.enable_plotting = enable_plotting self.allow_enhancement = allow_enhancement self.curved_line = curved_line self.full_layout = full_layout @@ -128,14 +134,14 @@ class Eynollah: self.headers_off = headers_off self.light_version = light_version self.pcgts = pcgts - self.plotter = None if not enable_plotting else EynollahPlotter( - dir_out=self.dir_out, - dir_of_all=dir_of_all, - dir_of_deskewed=dir_of_deskewed, - dir_of_cropped_images=dir_of_cropped_images, - dir_of_layout=dir_of_layout, - image_filename_stem=Path(Path(image_filename).name).stem) if not dir_in: + self.plotter = None if not enable_plotting else EynollahPlotter( + dir_out=self.dir_out, + dir_of_all=dir_of_all, + dir_of_deskewed=dir_of_deskewed, + dir_of_cropped_images=dir_of_cropped_images, + dir_of_layout=dir_of_layout, + image_filename_stem=Path(Path(image_filename).name).stem) self.writer = EynollahXmlWriter( dir_out=self.dir_out, image_filename=self.image_filename, @@ -208,6 +214,14 @@ class Eynollah: self._imgs = self._cache_images(image_filename=image_filename) self.image_filename = image_filename + self.plotter = None if not self.enable_plotting else EynollahPlotter( + dir_out=self.dir_out, + dir_of_all=self.dir_of_all, + dir_of_deskewed=self.dir_of_deskewed, + dir_of_cropped_images=self.dir_of_cropped_images, + dir_of_layout=self.dir_of_layout, + image_filename_stem=Path(Path(image_filename).name).stem) + self.writer = EynollahXmlWriter( dir_out=self.dir_out, image_filename=self.image_filename, @@ -1396,7 +1410,7 @@ class Eynollah: queue_of_all_params.put([textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours, slopes_per_each_subprocess]) def do_work_of_slopes_new_light(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew): - self.logger.debug('enter do_work_of_slopes_new') + self.logger.debug('enter do_work_of_slopes_new_light') slopes_per_each_subprocess = [] bounding_box_of_textregion_per_each_subprocess = [] textlines_rectangles_per_each_subprocess = [] @@ -1566,8 +1580,8 @@ class Eynollah: q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) - def get_regions_from_xy_2models_light(self,img,is_image_enhanced, num_col_classifier): - self.logger.debug("enter get_regions_from_xy_2models") + def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): + self.logger.debug("enter get_regions_light_v") erosion_hurts = False img_org = np.copy(img) img_height_h = img_org.shape[0] @@ -2929,7 +2943,7 @@ class Eynollah: t1 = time.time() if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_from_xy_2models_light(img_res, is_image_enhanced, num_col_classifier) + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \ @@ -3179,4 +3193,5 @@ class Eynollah: ##return pcgts self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) - self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) + if self.dir_in: + self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/plot.py b/qurator/eynollah/plot.py index b22c8f1..ec4e290 100644 --- a/qurator/eynollah/plot.py +++ b/qurator/eynollah/plot.py @@ -74,8 +74,8 @@ class EynollahPlotter(): if self.dir_of_layout is not None: values = np.unique(text_regions_p[:, :]) # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics'] - pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator"] - values_indexes = [0, 1, 2, 8, 4, 5, 6] + pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator", "Tables"] + values_indexes = [0, 1, 2, 8, 4, 5, 6, 10] plt.figure(figsize=(40, 40)) plt.rcParams["font.size"] = "40" im = plt.imshow(text_regions_p[:, :]) @@ -88,8 +88,8 @@ class EynollahPlotter(): if self.dir_of_all is not None: values = np.unique(text_regions_p[:, :]) # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics'] - pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator"] - values_indexes = [0, 1, 2, 8, 4, 5, 6] + pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator", "Tables"] + values_indexes = [0, 1, 2, 8, 4, 5, 6, 10] plt.figure(figsize=(80, 40)) plt.rcParams["font.size"] = "40" plt.subplot(1, 2, 1) From 735abc43f3102e8cf35d71dff2daeffc4a1cfeac Mon Sep 17 00:00:00 2001 From: vahid Date: Thu, 28 Apr 2022 01:14:57 +0200 Subject: [PATCH 11/67] option to ignore page extraction --- qurator/eynollah/cli.py | 8 ++ qurator/eynollah/eynollah.py | 242 +++++++++++++---------------------- 2 files changed, 98 insertions(+), 152 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index ca938c4..18ea583 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -108,6 +108,12 @@ from qurator.eynollah.eynollah import Eynollah is_flag=True, help="if this parameter set to true, this tool would use lighter version", ) +@click.option( + "--ignore_page_extraction/--extract_page_included", + "-ipe/-epi", + is_flag=True, + help="if this parameter set to true, this tool would ignore page extraction", +) @click.option( "--log-level", "-l", @@ -132,6 +138,7 @@ def main( allow_scaling, headers_off, light_version, + ignore_page_extraction, log_level ): if log_level: @@ -161,6 +168,7 @@ def main( allow_scaling=allow_scaling, headers_off=headers_off, light_version=light_version, + ignore_page_extraction=ignore_page_extraction, ) eynollah.run() #pcgts = eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 48a640c..8957248 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -105,6 +105,7 @@ class Eynollah: allow_scaling=False, headers_off=False, light_version=False, + ignore_page_extraction=False, override_dpi=None, logger=None, pcgts=None, @@ -133,6 +134,7 @@ class Eynollah: self.allow_scaling = allow_scaling self.headers_off = headers_off self.light_version = light_version + self.ignore_page_extraction = ignore_page_extraction self.pcgts = pcgts if not dir_in: self.plotter = None if not enable_plotting else EynollahPlotter( @@ -886,169 +888,100 @@ class Eynollah: gc.collect() return prediction_true - def early_page_for_num_of_column_classification(self,img_bin): - self.logger.debug("enter early_page_for_num_of_column_classification") - if self.input_binary: - img =np.copy(img_bin) - img = img.astype(np.uint8) - else: - img = self.imread() - if not self.dir_in: - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) - img = cv2.GaussianBlur(img, (5, 5), 0) - if self.dir_in: - img_page_prediction = self.do_prediction(False, img, self.model_page) - else: - img_page_prediction = self.do_prediction(False, img, model_page) - - imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) - _, thresh = cv2.threshold(imgray, 0, 255, 0) - thresh = cv2.dilate(thresh, KERNEL, iterations=3) - contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - if len(contours)>0: - cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) - cnt = contours[np.argmax(cnt_size)] - x, y, w, h = cv2.boundingRect(cnt) - box = [x, y, w, h] - else: - box = [0, 0, img.shape[1], img.shape[0]] - croped_page, page_coord = crop_image_inside_box(box, img) - if not self.dir_in: - session_page.close() - del model_page - del session_page - K.clear_session() - gc.collect() - self.logger.debug("exit early_page_for_num_of_column_classification") - return croped_page, page_coord - def extract_page(self): self.logger.debug("enter extract_page") cont_page = [] - if not self.dir_in: - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) - img = cv2.GaussianBlur(self.image, (5, 5), 0) - if not self.dir_in: - img_page_prediction = self.do_prediction(False, img, model_page) - else: - img_page_prediction = self.do_prediction(False, img, self.model_page) - imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) - _, thresh = cv2.threshold(imgray, 0, 255, 0) - thresh = cv2.dilate(thresh, KERNEL, iterations=3) - contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - if len(contours)>0: - cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) - cnt = contours[np.argmax(cnt_size)] - x, y, w, h = cv2.boundingRect(cnt) - if x <= 30: - w += x - x = 0 - if (self.image.shape[1] - (x + w)) <= 30: - w = w + (self.image.shape[1] - (x + w)) - if y <= 30: - h = h + y - y = 0 - if (self.image.shape[0] - (y + h)) <= 30: - h = h + (self.image.shape[0] - (y + h)) - - box = [x, y, w, h] + if not self.ignore_page_extraction: + if not self.dir_in: + model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + img = cv2.GaussianBlur(self.image, (5, 5), 0) + if not self.dir_in: + img_page_prediction = self.do_prediction(False, img, model_page) + else: + img_page_prediction = self.do_prediction(False, img, self.model_page) + imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(imgray, 0, 255, 0) + thresh = cv2.dilate(thresh, KERNEL, iterations=3) + contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + if len(contours)>0: + cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) + cnt = contours[np.argmax(cnt_size)] + x, y, w, h = cv2.boundingRect(cnt) + if x <= 30: + w += x + x = 0 + if (self.image.shape[1] - (x + w)) <= 30: + w = w + (self.image.shape[1] - (x + w)) + if y <= 30: + h = h + y + y = 0 + if (self.image.shape[0] - (y + h)) <= 30: + h = h + (self.image.shape[0] - (y + h)) + + box = [x, y, w, h] + else: + box = [0, 0, img.shape[1], img.shape[0]] + croped_page, page_coord = crop_image_inside_box(box, self.image) + cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) + if not self.dir_in: + session_page.close() + del model_page + del session_page + K.clear_session() + gc.collect() + self.logger.debug("exit extract_page") else: - box = [0, 0, img.shape[1], img.shape[0]] - croped_page, page_coord = crop_image_inside_box(box, self.image) - cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - if not self.dir_in: - session_page.close() - del model_page - del session_page - K.clear_session() - gc.collect() - self.logger.debug("exit extract_page") + box = [0, 0, self.image.shape[1], self.image.shape[0]] + croped_page, page_coord = crop_image_inside_box(box, self.image) + cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) return croped_page, page_coord, cont_page def early_page_for_num_of_column_classification(self,img_bin): - self.logger.debug("enter early_page_for_num_of_column_classification") - if self.input_binary: - img =np.copy(img_bin) - img = img.astype(np.uint8) + if not self.ignore_page_extraction: + self.logger.debug("enter early_page_for_num_of_column_classification") + if self.input_binary: + img =np.copy(img_bin) + img = img.astype(np.uint8) + else: + img = self.imread() + if not self.dir_in: + model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + img = cv2.GaussianBlur(img, (5, 5), 0) + + if self.dir_in: + img_page_prediction = self.do_prediction(False, img, self.model_page) + else: + img_page_prediction = self.do_prediction(False, img, model_page) + + imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(imgray, 0, 255, 0) + thresh = cv2.dilate(thresh, KERNEL, iterations=3) + contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + if len(contours)>0: + cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) + cnt = contours[np.argmax(cnt_size)] + x, y, w, h = cv2.boundingRect(cnt) + box = [x, y, w, h] + else: + box = [0, 0, img.shape[1], img.shape[0]] + croped_page, page_coord = crop_image_inside_box(box, img) + + if not self.dir_in: + session_page.close() + del model_page + del session_page + K.clear_session() + + gc.collect() + + self.logger.debug("exit early_page_for_num_of_column_classification") else: img = self.imread() - if not self.dir_in: - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) - img = cv2.GaussianBlur(img, (5, 5), 0) - - if self.dir_in: - img_page_prediction = self.do_prediction(False, img, self.model_page) - else: - img_page_prediction = self.do_prediction(False, img, model_page) - - imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) - _, thresh = cv2.threshold(imgray, 0, 255, 0) - thresh = cv2.dilate(thresh, KERNEL, iterations=3) - contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - if len(contours)>0: - cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) - cnt = contours[np.argmax(cnt_size)] - x, y, w, h = cv2.boundingRect(cnt) - box = [x, y, w, h] - else: box = [0, 0, img.shape[1], img.shape[0]] - croped_page, page_coord = crop_image_inside_box(box, img) - - if not self.dir_in: - session_page.close() - del model_page - del session_page - K.clear_session() - - gc.collect() - - self.logger.debug("exit early_page_for_num_of_column_classification") + croped_page, page_coord = crop_image_inside_box(box, img) return croped_page, page_coord - def extract_page(self): - self.logger.debug("enter extract_page") - cont_page = [] - if not self.dir_in: - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) - img = cv2.GaussianBlur(self.image, (5, 5), 0) - if not self.dir_in: - img_page_prediction = self.do_prediction(False, img, model_page) - else: - img_page_prediction = self.do_prediction(False, img, self.model_page) - imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) - _, thresh = cv2.threshold(imgray, 0, 255, 0) - thresh = cv2.dilate(thresh, KERNEL, iterations=3) - contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - if len(contours)>0: - cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) - cnt = contours[np.argmax(cnt_size)] - x, y, w, h = cv2.boundingRect(cnt) - if x <= 30: - w += x - x = 0 - if (self.image.shape[1] - (x + w)) <= 30: - w = w + (self.image.shape[1] - (x + w)) - if y <= 30: - h = h + y - y = 0 - if (self.image.shape[0] - (y + h)) <= 30: - h = h + (self.image.shape[0] - (y + h)) - - box = [x, y, w, h] - else: - box = [0, 0, img.shape[1], img.shape[0]] - croped_page, page_coord = crop_image_inside_box(box, self.image) - cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - if not self.dir_in: - session_page.close() - del model_page - del session_page - K.clear_session() - gc.collect() - self.logger.debug("exit extract_page") - return croped_page, page_coord, cont_page def extract_text_regions(self, img, patches, cols): self.logger.debug("enter extract_text_regions") @@ -2960,10 +2893,15 @@ class Eynollah: #self.logger.info('cont_page %s', cont_page) if not num_col: + print('buraya galir??') self.logger.info("No columns detected, outputting an empty PAGE-XML") pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], []) self.logger.info("Job done in %.1fs", time.time() - t1) - return pcgts + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts t1 = time.time() if not self.light_version: From cd9920eea76ab1bb5538fb50785a36d451ca1146 Mon Sep 17 00:00:00 2001 From: vahid Date: Wed, 4 May 2022 17:01:42 +0200 Subject: [PATCH 12/67] extracting page --- qurator/eynollah/cli.py | 16 ++++++++++++---- qurator/eynollah/eynollah.py | 4 ++++ qurator/eynollah/plot.py | 4 ++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 18ea583..6828cc5 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -54,6 +54,12 @@ from qurator.eynollah.eynollah import Eynollah help="if a directory is given, all plots needed for documentation will be saved there", type=click.Path(exists=True, file_okay=False), ) +@click.option( + "--save_page", + "-sp", + help="if a directory is given, page crop of image will be saved there", + type=click.Path(exists=True, file_okay=False), +) @click.option( "--enable-plotting/--disable-plotting", "-ep/-noep", @@ -129,6 +135,7 @@ def main( save_layout, save_deskewed, save_all, + save_page, enable_plotting, allow_enhancement, curved_line, @@ -144,11 +151,11 @@ def main( if log_level: setOverrideLogLevel(log_level) initLogging() - if not enable_plotting and (save_layout or save_deskewed or save_all or save_images or allow_enhancement): - print("Error: You used one of -sl, -sd, -sa, -si or -ae but did not enable plotting with -ep") + if not enable_plotting and (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement): + print("Error: You used one of -sl, -sd, -sa, -sp, -si or -ae but did not enable plotting with -ep") sys.exit(1) - elif enable_plotting and not (save_layout or save_deskewed or save_all or save_images or allow_enhancement): - print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa, -si or -ae") + elif enable_plotting and not (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement): + print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa, -sp, -si or -ae") sys.exit(1) eynollah = Eynollah( image_filename=image, @@ -159,6 +166,7 @@ def main( dir_of_layout=save_layout, dir_of_deskewed=save_deskewed, dir_of_all=save_all, + dir_save_page=save_page, enable_plotting=enable_plotting, allow_enhancement=allow_enhancement, curved_line=curved_line, diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 8957248..e56009b 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -96,6 +96,7 @@ class Eynollah: dir_of_layout=None, dir_of_deskewed=None, dir_of_all=None, + dir_save_page=None, enable_plotting=False, allow_enhancement=False, curved_line=False, @@ -121,6 +122,7 @@ class Eynollah: self.dir_out = dir_out self.dir_in = dir_in self.dir_of_all = dir_of_all + self.dir_save_page = dir_save_page self.dir_of_deskewed = dir_of_deskewed self.dir_of_deskewed = dir_of_deskewed self.dir_of_cropped_images=dir_of_cropped_images @@ -140,6 +142,7 @@ class Eynollah: self.plotter = None if not enable_plotting else EynollahPlotter( dir_out=self.dir_out, dir_of_all=dir_of_all, + dir_save_page=dir_save_page, dir_of_deskewed=dir_of_deskewed, dir_of_cropped_images=dir_of_cropped_images, dir_of_layout=dir_of_layout, @@ -219,6 +222,7 @@ class Eynollah: self.plotter = None if not self.enable_plotting else EynollahPlotter( dir_out=self.dir_out, dir_of_all=self.dir_of_all, + dir_save_page=self.dir_save_page, dir_of_deskewed=self.dir_of_deskewed, dir_of_cropped_images=self.dir_of_cropped_images, dir_of_layout=self.dir_of_layout, diff --git a/qurator/eynollah/plot.py b/qurator/eynollah/plot.py index ec4e290..b01fc04 100644 --- a/qurator/eynollah/plot.py +++ b/qurator/eynollah/plot.py @@ -19,6 +19,7 @@ class EynollahPlotter(): *, dir_out, dir_of_all, + dir_save_page, dir_of_deskewed, dir_of_layout, dir_of_cropped_images, @@ -29,6 +30,7 @@ class EynollahPlotter(): ): self.dir_out = dir_out self.dir_of_all = dir_of_all + self.dir_save_page = dir_save_page self.dir_of_layout = dir_of_layout self.dir_of_cropped_images = dir_of_cropped_images self.dir_of_deskewed = dir_of_deskewed @@ -127,6 +129,8 @@ class EynollahPlotter(): def save_page_image(self, image_page): if self.dir_of_all is not None: cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_page.png"), image_page) + if self.dir_save_page is not None: + cv2.imwrite(os.path.join(self.dir_save_page, self.image_filename_stem + "_page.png"), image_page) def save_enhanced_image(self, img_res): cv2.imwrite(os.path.join(self.dir_out, self.image_filename_stem + "_enhanced.png"), img_res) From ae7c42488930f18c5d437f7c74ebec1b5a74f338 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 13 May 2022 11:44:45 +0200 Subject: [PATCH 13/67] Update eynollah.py --- qurator/eynollah/eynollah.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index e56009b..820cbd7 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2897,7 +2897,6 @@ class Eynollah: #self.logger.info('cont_page %s', cont_page) if not num_col: - print('buraya galir??') self.logger.info("No columns detected, outputting an empty PAGE-XML") pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], []) self.logger.info("Job done in %.1fs", time.time() - t1) From 01bfc3914d859a5da1e9fe2b62c481ea1a440a63 Mon Sep 17 00:00:00 2001 From: vahid Date: Thu, 19 May 2022 12:27:01 +0200 Subject: [PATCH 14/67] extracting page as an option --- qurator/eynollah/eynollah.py | 1 + 1 file changed, 1 insertion(+) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 820cbd7..c125b1a 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2870,6 +2870,7 @@ class Eynollah: self.ls_imgs = [1] for img_name in self.ls_imgs: + print(img_name,'img_name') t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) From 402c5339aca18101d1e182f2f672c6ef6f4ec5dd Mon Sep 17 00:00:00 2001 From: vahid Date: Fri, 22 Jul 2022 15:32:35 +0200 Subject: [PATCH 15/67] issue #77 is resolved --- qurator/eynollah/utils/__init__.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index da14139..e9f872c 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -294,7 +294,7 @@ def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x #print(args_to_be_unified,'args_to_be_unified') - return reading_orther_type,x_start_returned, x_end_returned ,y_sep_returned,y_diff_returned,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother + return reading_orther_type,x_start_returned, x_end_returned ,y_sep_returned,y_diff_returned,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother,new_main_sep_y def crop_image_inside_box(box, img_org_copy): image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]] return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] @@ -1771,7 +1771,7 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho peaks_neg_tot_tables.append(peaks_neg_tot) - reading_order_type,x_starting,x_ending,y_type_2,y_diff_type_2,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother=return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x_max_hor_some,cy_hor_some,peaks_neg_tot,cy_hor_diff) + reading_order_type,x_starting,x_ending,y_type_2,y_diff_type_2,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother,new_main_sep_y=return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x_max_hor_some,cy_hor_some,peaks_neg_tot,cy_hor_diff) @@ -2240,9 +2240,18 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho ##y_lines_by_order.append(int(splitter_y_new[i])) ##x_start_by_order.append(0) - y_type_2.append(int(splitter_y_new[i])) - x_starting.append(x_starting[0]) - x_ending.append(x_ending[0]) + #y_type_2.append(int(splitter_y_new[i])) + #x_starting.append(x_starting[0]) + #x_ending.append(x_ending[0]) + + if len(new_main_sep_y)>0: + y_type_2.append(int(splitter_y_new[i])) + x_starting.append(0) + x_ending.append(len(peaks_neg_tot)-1) + else: + y_type_2.append(int(splitter_y_new[i])) + x_starting.append(x_starting[0]) + x_ending.append(x_ending[0]) y_type_2=np.array(y_type_2) From dbf91876e1e352fb9be7d76dad91630d4738cf21 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Tue, 13 Sep 2022 13:06:18 +0200 Subject: [PATCH 16/67] Adapt to new location of models --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 920f15b..13becb1 100644 --- a/Makefile +++ b/Makefile @@ -21,11 +21,11 @@ help: # Download and extract models to $(PWD)/models_eynollah models: models_eynollah -models_eynollah: models_eynollah.tar.gz - tar xf models_eynollah.tar.gz +models_eynollah: models_eynollah_renamed.tar.gz + tar xf models_eynollah_renamed.tar.gz models_eynollah.tar.gz: - wget 'https://qurator-data.de/eynollah/models_eynollah.tar.gz' + wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' # Install with pip install: From 07fe0d827dc4b525741b2c38e1dd68170db66363 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Tue, 13 Sep 2022 13:09:09 +0200 Subject: [PATCH 17/67] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 13becb1..1f5308e 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ help: # Download and extract models to $(PWD)/models_eynollah models: models_eynollah -models_eynollah: models_eynollah_renamed.tar.gz +models_eynollah: models_eynollah.tar.gz tar xf models_eynollah_renamed.tar.gz models_eynollah.tar.gz: From 583cdcee2cb20f9e8de38213112be07a5b2a4c15 Mon Sep 17 00:00:00 2001 From: vahid Date: Tue, 13 Sep 2022 15:07:00 +0200 Subject: [PATCH 18/67] new (hybrid cnn+transformer) textline model which can accelerate to extract contour textlines faster --- qurator/eynollah/cli.py | 10 +++- qurator/eynollah/eynollah.py | 103 ++++++++++++++++++++++++++++++----- qurator/eynollah/writer.py | 15 ++--- requirements.txt | 12 ++-- 4 files changed, 113 insertions(+), 27 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 6828cc5..ddf986e 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -76,7 +76,13 @@ from qurator.eynollah.eynollah import Eynollah "--curved-line/--no-curvedline", "-cl/-nocl", is_flag=True, - help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectabgle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.", + help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectangle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.", +) +@click.option( + "--textline_light/--no-textline_light", + "-tll/-notll", + is_flag=True, + help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectangle bounding box of textline with a faster method.", ) @click.option( "--full-layout/--no-full-layout", @@ -139,6 +145,7 @@ def main( enable_plotting, allow_enhancement, curved_line, + textline_light, full_layout, tables, input_binary, @@ -170,6 +177,7 @@ def main( enable_plotting=enable_plotting, allow_enhancement=allow_enhancement, curved_line=curved_line, + textline_light=textline_light, full_layout=full_layout, tables=tables, input_binary=input_binary, diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c125b1a..8de793c 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -30,6 +30,7 @@ from scipy.signal import find_peaks import matplotlib.pyplot as plt from scipy.ndimage import gaussian_filter1d from keras.backend import set_session +from tensorflow.keras import layers from .utils.contour import ( filter_contours_area_of_image, @@ -83,6 +84,60 @@ DPI_THRESHOLD = 298 MAX_SLOPE = 999 KERNEL = np.ones((5, 5), np.uint8) +projection_dim = 64 +patch_size = 1 +num_patches =21*21#14*14#28*28#14*14#28*28 + + +class Patches(layers.Layer): + def __init__(self, **kwargs): + super(Patches, self).__init__() + self.patch_size = patch_size + + def call(self, images): + batch_size = tf.shape(images)[0] + patches = tf.image.extract_patches( + images=images, + sizes=[1, self.patch_size, self.patch_size, 1], + strides=[1, self.patch_size, self.patch_size, 1], + rates=[1, 1, 1, 1], + padding="VALID", + ) + patch_dims = patches.shape[-1] + patches = tf.reshape(patches, [batch_size, -1, patch_dims]) + return patches + def get_config(self): + + config = super().get_config().copy() + config.update({ + 'patch_size': self.patch_size, + }) + return config + + +class PatchEncoder(layers.Layer): + def __init__(self, **kwargs): + super(PatchEncoder, self).__init__() + self.num_patches = num_patches + self.projection = layers.Dense(units=projection_dim) + self.position_embedding = layers.Embedding( + input_dim=num_patches, output_dim=projection_dim + ) + + def call(self, patch): + positions = tf.range(start=0, limit=self.num_patches, delta=1) + encoded = self.projection(patch) + self.position_embedding(positions) + return encoded + def get_config(self): + + config = super().get_config().copy() + config.update({ + 'num_patches': self.num_patches, + 'projection': self.projection, + 'position_embedding': self.position_embedding, + }) + return config + class Eynollah: def __init__( self, @@ -100,6 +155,7 @@ class Eynollah: enable_plotting=False, allow_enhancement=False, curved_line=False, + textline_light=False, full_layout=False, tables=False, input_binary=False, @@ -130,6 +186,7 @@ class Eynollah: self.enable_plotting = enable_plotting self.allow_enhancement = allow_enhancement self.curved_line = curved_line + self.textline_light = textline_light self.full_layout = full_layout self.tables = tables self.input_binary = input_binary @@ -151,6 +208,7 @@ class Eynollah: dir_out=self.dir_out, image_filename=self.image_filename, curved_line=self.curved_line, + textline_light = self.textline_light, pcgts=pcgts) self.logger = logger if logger else getLogger('eynollah') self.dir_models = dir_models @@ -165,7 +223,10 @@ class Eynollah: self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425.h5" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425.h5" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314.h5" - self.model_textline_dir = dir_models + "/eynollah-textline_20210425.h5" + if self.textline_light: + self.model_textline_dir = dir_models + "/model_17.h5" + else: + self.model_textline_dir = dir_models + "/eynollah-textline_20210425.h5" self.model_tables = dir_models + "/eynollah-tables_20210319.h5" if dir_in and light_version: @@ -603,7 +664,10 @@ class Eynollah: gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True) session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) - model = load_model(model_dir, compile=False) + try: + model = load_model(model_dir, compile=False) + except: + model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model, session @@ -1368,12 +1432,17 @@ class Eynollah: # plt.imshow(mask_only_con_region) # plt.show() - all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]) - mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] - - - all_text_region_raw[mask_only_con_region == 0] = 0 - cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, [slope_deskew][0], contours_par_per_process[mv], boxes_text[mv]) + + if self.textline_light: + all_text_region_raw = np.copy(textline_mask_tot_ea) + all_text_region_raw[mask_only_con_region == 0] = 0 + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(all_text_region_raw) + cnt_clean_rot = filter_contours_area_of_image(all_text_region_raw, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + else: + all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]) + mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] + all_text_region_raw[mask_only_con_region == 0] = 0 + cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, [slope_deskew][0], contours_par_per_process[mv], boxes_text[mv]) textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) @@ -1481,8 +1550,10 @@ class Eynollah: if not self.dir_in: session_textline.close() - - return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0] + if self.textline_light: + return (prediction_textline[:, :, 0]==1)*1, (prediction_textline_longshot_true_size[:, :, 0]==1)*1 + else: + return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0] def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): self.logger.debug('enter do_work_of_slopes') @@ -2562,6 +2633,8 @@ class Eynollah: scaler_h_textline = 1 # 1.2#1.2 scaler_w_textline = 1 # 0.9#1 textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline) + if self.textline_light: + textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) if not self.dir_in: K.clear_session() if self.plotter: @@ -2870,7 +2943,6 @@ class Eynollah: self.ls_imgs = [1] for img_name in self.ls_imgs: - print(img_name,'img_name') t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) @@ -2887,6 +2959,7 @@ class Eynollah: num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \ self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts) #self.logger.info("run graphics %.1fs ", time.time() - t1t) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) else: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -3043,8 +3116,12 @@ class Eynollah: if not self.curved_line: if self.light_version: - slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + if self.textline_light: + slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + else: + slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: slopes, all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index d36d3ab..d5704f6 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -22,12 +22,13 @@ import numpy as np class EynollahXmlWriter(): - def __init__(self, *, dir_out, image_filename, curved_line, pcgts=None): + def __init__(self, *, dir_out, image_filename, curved_line,textline_light, pcgts=None): self.logger = getLogger('eynollah.writer') self.counter = EynollahIdCounter() self.dir_out = dir_out self.image_filename = image_filename self.curved_line = curved_line + self.textline_light = textline_light self.pcgts = pcgts self.scale_x = None # XXX set outside __init__ self.scale_y = None # XXX set outside __init__ @@ -60,7 +61,7 @@ class EynollahXmlWriter(): marginal_region.add_TextLine(textline) points_co = '' for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])): - if not self.curved_line: + if not (self.curved_line or self.textline_light): if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2: textline_x_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) ) textline_y_coord = max(0, int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) ) @@ -70,7 +71,7 @@ class EynollahXmlWriter(): points_co += str(textline_x_coord) points_co += ',' points_co += str(textline_y_coord) - if self.curved_line and np.abs(slopes_marginals[marginal_idx]) <= 45: + if (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) <= 45: if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2: points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x)) points_co += ',' @@ -80,7 +81,7 @@ class EynollahXmlWriter(): points_co += ',' points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y)) - elif self.curved_line and np.abs(slopes_marginals[marginal_idx]) > 45: + elif (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) > 45: if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2: points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) points_co += ',' @@ -101,7 +102,7 @@ class EynollahXmlWriter(): region_bboxes = all_box_coord[region_idx] points_co = '' for idx_contour_textline, contour_textline in enumerate(all_found_texline_polygons[region_idx][j]): - if not self.curved_line: + if not (self.curved_line or self.textline_light): if len(contour_textline) == 2: textline_x_coord = max(0, int((contour_textline[0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) textline_y_coord = max(0, int((contour_textline[1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) @@ -112,7 +113,7 @@ class EynollahXmlWriter(): points_co += ',' points_co += str(textline_y_coord) - if self.curved_line and np.abs(slopes[region_idx]) <= 45: + if (self.curved_line or self.textline_light) and np.abs(slopes[region_idx]) <= 45: if len(contour_textline) == 2: points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x)) points_co += ',' @@ -121,7 +122,7 @@ class EynollahXmlWriter(): points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x)) points_co += ',' points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y)) - elif self.curved_line and np.abs(slopes[region_idx]) > 45: + elif (self.curved_line or self.textline_light) and np.abs(slopes[region_idx]) > 45: if len(contour_textline)==2: points_co += str(int((contour_textline[0] + region_bboxes[2] + page_coord[2])/self.scale_x)) points_co += ',' diff --git a/requirements.txt b/requirements.txt index 8520780..54bb55e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # ocrd includes opencv, numpy, shapely, click -ocrd >= 2.23.3 -keras >= 2.3.1, < 2.4 -scikit-learn >= 0.23.2 -tensorflow-gpu >= 1.15, < 2 -imutils >= 0.5.3 +ocrd +keras == 2.6.0 +scikit-learn +tensorflow-gpu == 2.6.0 +imutils matplotlib -setuptools >= 50 +setuptools From 38bf0d8740d33e8d4ffdba9122b93924783e2cdf Mon Sep 17 00:00:00 2001 From: vahid Date: Tue, 13 Sep 2022 16:08:08 +0200 Subject: [PATCH 19/67] solving issue by loading model by directory as input --- qurator/eynollah/eynollah.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 8de793c..0034b5f 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -293,6 +293,7 @@ class Eynollah: dir_out=self.dir_out, image_filename=self.image_filename, curved_line=self.curved_line, + textline_light = self.textline_light, pcgts=self.pcgts) def imread(self, grayscale=False, uint8=True): key = 'img' @@ -2926,8 +2927,11 @@ class Eynollah: return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables def our_load_model(self, model_file): - - model = load_model(model_file, compile=False) + + try: + model = load_model(model_file, compile=False) + except: + model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model From 000402f0dc880d7460b61345b217595c7e3e4083 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Tue, 13 Sep 2022 16:40:44 +0200 Subject: [PATCH 20/67] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 7438427..cdc724b 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,10 @@ Alternatively, you can also use `make` with these targets: `make install-dev` for editable installation +The current version of Eynollah runs on Python >=3.6 with Tensorflow >=2.4. + +In order to use a GPU for inference, the CUDA toolkit version 10.x needs to be installed. + ### Models In order to run this tool you need trained models. You can download our pretrained models from [qurator-data.de](https://qurator-data.de/eynollah/). From b75d8afb1d467a286d67c5e97a3d154b28850df0 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Tue, 13 Sep 2022 17:19:19 +0200 Subject: [PATCH 21/67] Update README.md --- README.md | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index cdc724b..196c188 100644 --- a/README.md +++ b/README.md @@ -33,28 +33,29 @@ In case you want to train your own model to use with Eynollah, have a look at [s The command-line interface can be called like this: ```sh -eynollah \ --i \ --o \ --m \ --fl \ --ae \ --as \ --cl \ --si \ --sd \ --sa \ --tab \ --ib \ --ho \ --sl \ --ep --light --di +eynollah -i -o -m [OPTIONS] +``` + +Additionally, the following optional parameters can be used to further configure the processing: +```sh +-fl: the tool will perform full layout analysis including detection of marginalia and drop capitals +-ae: the tool will resize and enhance the image. The rescaled and enhanced image is saved to the output directory +-as: the tool will check whether the document needs rescaling or not +-cl: the tool will extract contours of curved textlines instead of rectangle bounding boxes +-si : when a directory is given here, the tool will save image regions detected in documents to this directory +-sd : when a directory is given, deskewed image will be saved to this directory +-sa : when a directory is given, plots of layout detection are saved to this directory +-tab: the tool will try to detect tables +-ib: the tool will binarize the input image +-ho: the tool will ignore headers in reading order detection +-sl : when a directory is given, plots of layout detection are saved to this directory +-ep: the tool will save a plot. This should be used alongside with `-sl`, `-sd`, `-sa`, `-si` or `-ae` options +-light: the tool will apply a faster method for main region detection and deskewing +-di : the tool will process all images in the directory in batch mode ``` -The tool performs better with RGB images than greyscale/binarized images. +The tool performs better with RGB images as input than with greyscale or binarized images. ## Documentation @@ -126,7 +127,9 @@ Some heuristic methods are also employed to further improve the model prediction
click to expand/collapse
- + +The tool makes use of a combination of several models. For model training, please see [Training](https://github.com/qurator-spk/eynollah/blob/eynollah_light/README.md#training). + #### Enhancement model: The image enhancement model is again an image-to-image model, trained on document images with low quality and GT of corresponding images with higher quality. For training the image enhancement model, a total of 1127 document images underwent 11 different downscaling processes and consequently 11 different qualities for each image were derived. The resulting images were cropped into patches of 672*672 pixels. Adam is used as an optimizer and the learning rate is 1e-4. Scaling is the only augmentation applied for training. The model is trained with a batch size of 2 and for 5 epochs. From ffc7f8290648cb87796bd87063dd173c649645eb Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Tue, 13 Sep 2022 21:48:21 +0200 Subject: [PATCH 22/67] Update README.md --- README.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 196c188..9374962 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Alternatively, you can also use `make` with these targets: `make install-dev` for editable installation -The current version of Eynollah runs on Python >=3.6 with Tensorflow >=2.4. +The current version of Eynollah runs on Python `>=3.6` with Tensorflow `>=2.4`. In order to use a GPU for inference, the CUDA toolkit version 10.x needs to be installed. @@ -36,23 +36,23 @@ The command-line interface can be called like this: eynollah -i -o -m [OPTIONS] ``` -Additionally, the following optional parameters can be used to further configure the processing: +The following options can be used to further configure the processing: ```sh --fl: the tool will perform full layout analysis including detection of marginalia and drop capitals --ae: the tool will resize and enhance the image. The rescaled and enhanced image is saved to the output directory --as: the tool will check whether the document needs rescaling or not --cl: the tool will extract contours of curved textlines instead of rectangle bounding boxes --si : when a directory is given here, the tool will save image regions detected in documents to this directory --sd : when a directory is given, deskewed image will be saved to this directory --sa : when a directory is given, plots of layout detection are saved to this directory --tab: the tool will try to detect tables --ib: the tool will binarize the input image --ho: the tool will ignore headers in reading order detection --sl : when a directory is given, plots of layout detection are saved to this directory --ep: the tool will save a plot. This should be used alongside with `-sl`, `-sd`, `-sa`, `-si` or `-ae` options --light: the tool will apply a faster method for main region detection and deskewing --di : the tool will process all images in the directory in batch mode +-fl: perform full layout analysis including detection of marginalia and drop capitals +-ae: allow resizing and enhancing the input image, a rescaled and enhanced image is saved to the output directory +-as: allow scaling - check whether the input image needs rescaling or not +-cl: extract contours of curved textlines instead of rectangle bounding boxes +-si : save image regions detected in documents to this directory +-sd : save deskewed image to this directory +-sa : save plot of layout detection to this directory +-tab: try to detect tables +-ib: allow binarization of the input image +-ho: ignore headers in reading order detection +-sl : save plots of layout detection to this directory +-ep: save a plot. This should be used alongside with `-sl`, `-sd`, `-sa`, `-si` or `-ae` options +-light: apply a faster but simpler method for main region detection and deskewing +-di : process all images in a directory in batch mode ``` The tool performs better with RGB images as input than with greyscale or binarized images. From 5ca857018b9e4f4cc0dd64c60d17254da091e6c7 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Wed, 14 Sep 2022 15:26:36 +0200 Subject: [PATCH 23/67] Update README.md --- README.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 9374962..e307624 100644 --- a/README.md +++ b/README.md @@ -38,21 +38,21 @@ eynollah -i -o -m : save image regions detected in documents to this directory --sd : save deskewed image to this directory --sa : save plot of layout detection to this directory --tab: try to detect tables --ib: allow binarization of the input image --ho: ignore headers in reading order detection --sl : save plots of layout detection to this directory --ep: save a plot. This should be used alongside with `-sl`, `-sd`, `-sa`, `-si` or `-ae` options --light: apply a faster but simpler method for main region detection and deskewing --di : process all images in a directory in batch mode +``` +-fl perform full layout analysis including detection of marginalia and drop capitals +-tab try to detect tables +-light apply a faster but simpler method for main region detection and deskewing +-ae allow resizing and enhancing the input image, the enhanced image is saved to the output directory +-as allow scaling - automatically check whether the input image needs scaling or not +-ib allow binarization of the input image +-ho ignore headers for reading order prediction +-cl extract contours of curved textlines instead of rectangle bounding boxes +-ep enables plotting. This MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae` options +-di process all images in a directory in batch mode +-si save image regions detected in documents to this directory +-sd save deskewed image to this directory +-sl save layout prediction as plot to this directory +-sa save all outputs (plot, enhanced or binary image and layout prediction) to this directory ``` The tool performs better with RGB images as input than with greyscale or binarized images. From 30ef006dfd1e525def5622575ca32f5b36123f52 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Wed, 14 Sep 2022 18:28:54 +0200 Subject: [PATCH 24/67] Update README.md Clarify CLI options --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e307624..bcc667b 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ eynollah -i -o -m Date: Tue, 28 Mar 2023 23:14:29 +0200 Subject: [PATCH 25/67] Update requirements.txt --- requirements.txt | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 54bb55e..0180d01 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,7 @@ # ocrd includes opencv, numpy, shapely, click -ocrd -keras == 2.6.0 -scikit-learn -tensorflow-gpu == 2.6.0 -imutils +ocrd >= 2.23.3 +scikit-learn >= 0.23.2 +tensorflow >= 2.4.0 +imutils >= 0.5.3 matplotlib -setuptools +setuptools >= 50 From 4276417938e0fb9d83be1fd8cf192bd85b703c8a Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 28 Mar 2023 23:32:42 +0200 Subject: [PATCH 26/67] Update README.md --- README.md | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index bcc667b..1cadf5d 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ## Installation `pip install .` or -`pip install . -e` for editable installation +`pip install -e .` for editable installation Alternatively, you can also use `make` with these targets: @@ -123,40 +123,12 @@ Some heuristic methods are also employed to further improve the model prediction
-### Model description - -
- click to expand/collapse
- -The tool makes use of a combination of several models. For model training, please see [Training](https://github.com/qurator-spk/eynollah/blob/eynollah_light/README.md#training). - -#### Enhancement model: -The image enhancement model is again an image-to-image model, trained on document images with low quality and GT of corresponding images with higher quality. For training the image enhancement model, a total of 1127 document images underwent 11 different downscaling processes and consequently 11 different qualities for each image were derived. The resulting images were cropped into patches of 672*672 pixels. Adam is used as an optimizer and the learning rate is 1e-4. Scaling is the only augmentation applied for training. The model is trained with a batch size of 2 and for 5 epochs. - -#### Classifier model: -In order to obtain high quality results, it is beneficial to scale the document image to the same scale of the images in the training dataset that the models were trained on. The classifier model predicts the number of columns in a document by creating a training set for that purpose with manual classification of all documents into six classes with either one, two, three, four, five, or six and more columns respectively. Classifier model is a ResNet50+2 dense layers on top. The input size of model is 448*448 and Adam is used as an optimizer and the learning rate is 1e-4. Model is trained for 300 epochs. - -#### Page extractor model: -This a deep learning model which helps to crop the page borders by using a pixel-wise segmentation method. In case of page extraction it is necessary to train the model on the entire (document) image, i.e. full images are resized to the input size of the model (no patches). For training, the model is fed with entire images from the 2820 samples of the extended training set. The input size of the the page extraction model is 448*448 pixels. Adam is used as an optimizer and the learning rate is 1e-6. The model is trained with a batch size of 4 and for 30 epochs. - -#### Early layout model: -The early layout detection model detects only the main and recursive regions in a document like background, text regions, separators and images. In the case of early layout segmentation, we used 381 pages to train the model. The model is fed with patches of size 448*672 pixels. Adam is used as an optimizer and the learning rate is 1e-4. Two models were trained, one with scale augmentation and another one without any augmentation. Both models were trained for 12 epochs and with a batch size of 3. Categorical cross entropy is used as a loss function. - -#### Full layout model: -By full layout detection we have added two more elements of a document structure, drop capitals and headings, onto early layout elements. For the secondary layout segmentation we have trained two models. One is trained with 355 pages containing 3 or more columns and in patches with a size of 896*896 pixels. The other model is trained on 634 pages that have only one column. The second model is fed with the entire image with input size -of 896 * 896 pixels (not in patches). Adam is used as an optimizer and the learning rate is 1e-4. Then both models are trained for 8 epochs with a batch size of 1. Soft dice is used as the loss function. - -#### Text line segmentation model: -For text line segmentation, 342 pages were used for training. The model is trained in patches with the size of 448*672. Adam is used as an optimizer and the learning rate is 1e-4. The training set is augmented with scaling and rotation. The model is trained only for 1 epoch with a batch size of 3. Soft dice is again used as the loss function. - -
- ### How to use
click to expand/collapse
-First, this model makes use of up to 9 trained models which are responsible for different operations like size detection, column classification, image enhancement, page extraction, main layout detection, full layout detection and textline detection.That does not mean that all 9 models are always required for every document. Based on the document characteristics and parameters specified, different scenarios can be applied. +Eynollah makes use of up to 9 trained models which are responsible for different operations like size detection, column classification, image enhancement, page extraction, main layout detection, full layout detection and textline detection.That does not mean that all 9 models are always required for every document. Based on the document characteristics and parameters specified, different scenarios can be applied. * If none of the parameters is set to `true`, the tool will perform a layout detection of main regions (background, text, images, separators and marginals). An advantage of this tool is that it tries to extract main text regions separately as much as possible. @@ -201,7 +173,7 @@ would still use the original (RGB) image despite any binarization that may have #### Eynollah "light" - Eynollah light has used a faster method to predict and extract early layout. On other hand with light version deskewing is not applied for any text region and in return it is done for the whole document once. The other option that users have with light version is that instead of image name a folder of images can be given as input and in this case all models will be loaded and then processing for all images will be implemented. This step accelerates process of document analysis. + Eynollah light uses a faster method to predict and extract the early layout. But with the light option enabled deskewing is not applied for any text region and done only once for the whole document.
From f37d324812bc90315e1a7b1002144bf877c61f82 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 28 Mar 2023 23:36:50 +0200 Subject: [PATCH 27/67] Use renamed models in SavedModel format --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 1f5308e..c9e7a11 100644 --- a/Makefile +++ b/Makefile @@ -22,10 +22,10 @@ help: models: models_eynollah models_eynollah: models_eynollah.tar.gz - tar xf models_eynollah_renamed.tar.gz + tar xf 2022-04-05.SavedModel.tar.gz models_eynollah.tar.gz: - wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' + wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' # Install with pip install: From 27834ce33de6eef2b81ef458dc3fd7fb924271dd Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 28 Mar 2023 23:45:38 +0200 Subject: [PATCH 28/67] update CI --- .circleci/config.yml | 27 ++++++++++++++++++++++----- .github/workflows/test-eynollah.yml | 4 ++-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 72b2c5a..8cf026c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,9 +2,26 @@ version: 2 jobs: - build-python36: + build-python37: docker: - - image: python:3.6 + - image: python:3.7 + steps: + - checkout + - restore_cache: + keys: + - model-cache + - run: make models + - save_cache: + key: model-cache + paths: + models_eynollah.tar.gz + models_eynollah + - run: make install + - run: make smoke-test + + build-python38: + docker: + - image: python:3.8 steps: - checkout - restore_cache: @@ -23,6 +40,6 @@ workflows: version: 2 build: jobs: - - build-python36 - #- build-python37 - #- build-python38 # no tensorflow for python 3.8 + #- build-python36 + - build-python37 + - build-python38 \ No newline at end of file diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index 1afd2a6..de742f1 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.6'] # '3.7' + python-version: ['3.7'] # '3.8' steps: - uses: actions/checkout@v2 @@ -33,4 +33,4 @@ jobs: pip install . pip install -r requirements-test.txt - name: Test with pytest - run: make test + run: make test \ No newline at end of file From 4642ccb36d435ac0d13792d3127b7301ddcec5b9 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 28 Mar 2023 23:54:50 +0200 Subject: [PATCH 29/67] Update config.yml --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8cf026c..23eb724 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -42,4 +42,4 @@ workflows: jobs: #- build-python36 - build-python37 - - build-python38 \ No newline at end of file + - build-python38 From 2c13f1bddc8a9495e8a1ca138f7c62b6651d7292 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 29 Mar 2023 00:02:16 +0200 Subject: [PATCH 30/67] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1cadf5d..02bbcb7 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ Alternatively, you can also use `make` with these targets: `make install` or -`make install-dev` for editable installation +`make install-dev` for editable installation -The current version of Eynollah runs on Python `>=3.6` with Tensorflow `>=2.4`. +The current version of Eynollah runs on Python `>=3.6` with Tensorflow `>=2.4`. In order to use a GPU for inference, the CUDA toolkit version 10.x needs to be installed. From d21cc42d875930c779aaa96126e123732712ce6e Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 29 Mar 2023 00:21:02 +0200 Subject: [PATCH 31/67] Update README.md remove Python 3.6 from supported versions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 02bbcb7..da11b82 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Alternatively, you can also use `make` with these targets: `make install-dev` for editable installation -The current version of Eynollah runs on Python `>=3.6` with Tensorflow `>=2.4`. +The current version of Eynollah runs on Python `>=3.7` with Tensorflow `>=2.4`. In order to use a GPU for inference, the CUDA toolkit version 10.x needs to be installed. From 58ca226f2db847e3a837afc8aee28f8a217c82c1 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 29 Mar 2023 00:54:51 +0200 Subject: [PATCH 32/67] apply some fixes from main --- qurator/eynollah/eynollah.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 0034b5f..be490fe 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -20,10 +20,10 @@ import numpy as np os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" stderr = sys.stderr sys.stderr = open(os.devnull, "w") -from keras import backend as K -from keras.models import load_model -sys.stderr = stderr import tensorflow as tf +from tensorflow.python.keras import backend as K +from tensorflow.keras.models load_model +sys.stderr = stderr tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") from scipy.signal import find_peaks @@ -699,7 +699,7 @@ class Eynollah: if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) - self.logger.info("Image dimensions: %sx%s", img_height_model, img_width_model) + self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) margin = int(marginal_of_patch_percent * img_height_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin From 3d54719c87152ae94e4cc2c5572bc554825fae57 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 29 Mar 2023 01:01:19 +0200 Subject: [PATCH 33/67] fix import --- qurator/eynollah/eynollah.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index be490fe..6500c2e 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -22,7 +22,7 @@ stderr = sys.stderr sys.stderr = open(os.devnull, "w") import tensorflow as tf from tensorflow.python.keras import backend as K -from tensorflow.keras.models load_model +from tensorflow.keras.models import load_model sys.stderr = stderr tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") From 73057d57d1fcfc3fab6495bf46570365f7988ca4 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sat, 11 Feb 2023 11:58:40 +0000 Subject: [PATCH 35/67] silentium! --- qurator/eynollah/eynollah.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 6500c2e..6f776ae 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -356,7 +356,8 @@ class Eynollah: index_y_d = img_h - img_height_model img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model_enhancement.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + label_p_pred = model_enhancement.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), + verbose=0) seg = label_p_pred[0, :, :, :] seg = seg * 255 @@ -491,10 +492,11 @@ class Eynollah: img_in[0, :, :, 0] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] + if not self.dir_in: - label_p_pred = model_num_classifier.predict(img_in) + label_p_pred = model_num_classifier.predict(img_in, verbose=0) else: - label_p_pred = self.model_classifier.predict(img_in) + label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 self.logger.info("Found %s columns (%s)", num_col, label_p_pred) @@ -572,10 +574,11 @@ class Eynollah: if self.dir_in: - label_p_pred = self.model_classifier.predict(img_in) + label_p_pred = self.model_classifier.predict(img_in, verbose=0) else: - label_p_pred = model_num_classifier.predict(img_in) + label_p_pred = model_num_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 + self.logger.info("Found %s columns (%s)", num_col, label_p_pred) if not self.dir_in: session_col_classifier.close() @@ -684,7 +687,8 @@ class Eynollah: img = img / float(255.0) img = resize_image(img, img_height_model, img_width_model) - label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), + verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) @@ -736,7 +740,8 @@ class Eynollah: index_y_d = img_h - img_height_model img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), + verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) From 1ac0a7e06f968db8adb0e30e2cfebe5e8e8ce7c5 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 10 Feb 2023 00:56:52 +0000 Subject: [PATCH 36/67] try loading as TF SavedModel instead of HDF5 --- qurator/eynollah/eynollah.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 6f776ae..406964a 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -668,10 +668,15 @@ class Eynollah: gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True) session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) - try: - model = load_model(model_dir, compile=False) - except: - model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) + + # try: + # model = load_model(model_dir, compile=False) + # except: + # model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) + if model_dir.endswith('.h5') and Path(model_dir[:-3]).exists(): + # prefer SavedModel over HDF5 format if it exists + model_dir = model_dir[:-3] + model = load_model(model_dir, compile=False) return model, session From 9849541061f417a859e03831dedee3f087d0b9d1 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Thu, 30 Mar 2023 22:22:36 +0200 Subject: [PATCH 37/67] Update Makefile test hdf5 models --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index c9e7a11..90b9891 100644 --- a/Makefile +++ b/Makefile @@ -22,10 +22,12 @@ help: models: models_eynollah models_eynollah: models_eynollah.tar.gz - tar xf 2022-04-05.SavedModel.tar.gz + tar xf tar xf models_eynollah_renamed.tar.gz + # tar xf 2022-04-05.SavedModel.tar.gz models_eynollah.tar.gz: - wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' + wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' + # wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' # Install with pip install: From d4dd532212ce44c26421e79a6ff079ce4c71a5b2 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Thu, 30 Mar 2023 22:37:15 +0200 Subject: [PATCH 38/67] Update Makefile caj --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 90b9891..e5227b2 100644 --- a/Makefile +++ b/Makefile @@ -22,11 +22,13 @@ help: models: models_eynollah models_eynollah: models_eynollah.tar.gz - tar xf tar xf models_eynollah_renamed.tar.gz + tar xf models_eynollah.tar.gz + # tar xf models_eynollah_renamed.tar.gz # tar xf 2022-04-05.SavedModel.tar.gz models_eynollah.tar.gz: - wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' + wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' + # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' # wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' # Install with pip From fb6d97091bc498502a841c4f445365401d93918e Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 16 Feb 2023 14:40:02 +0000 Subject: [PATCH 39/67] OCR-D wrapper: expose tables param --- qurator/eynollah/ocrd-tool.json | 5 +++++ qurator/eynollah/processor.py | 1 + 2 files changed, 6 insertions(+) diff --git a/qurator/eynollah/ocrd-tool.json b/qurator/eynollah/ocrd-tool.json index 220f2ea..1291979 100644 --- a/qurator/eynollah/ocrd-tool.json +++ b/qurator/eynollah/ocrd-tool.json @@ -29,6 +29,11 @@ "default": true, "description": "Try to detect all element subtypes, including drop-caps and headings" }, + "tables": { + "type": "boolean", + "default": false, + "description": "Try to detect table regions" + }, "curved_line": { "type": "boolean", "default": false, diff --git a/qurator/eynollah/processor.py b/qurator/eynollah/processor.py index 41b12ae..ccec456 100644 --- a/qurator/eynollah/processor.py +++ b/qurator/eynollah/processor.py @@ -50,6 +50,7 @@ class EynollahProcessor(Processor): 'full_layout': self.parameter['full_layout'], 'allow_scaling': self.parameter['allow_scaling'], 'headers_off': self.parameter['headers_off'], + 'tables': self.parameter['tables'], 'override_dpi': self.parameter['dpi'], 'logger': LOG, 'pcgts': pcgts, From a9728bb899315ae2672b1a0be3477dfce5fb1a10 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Thu, 30 Mar 2023 23:44:05 +0200 Subject: [PATCH 40/67] Update eynollah.py predict quietly please --- qurator/eynollah/eynollah.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 406964a..f210fcd 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -827,7 +827,7 @@ class Eynollah: if img.shape[1] < img_width_model: img = resize_image(img, img.shape[0], img_width_model) - self.logger.info("Image dimensions: %sx%s", img_height_model, img_width_model) + self.logger.debug("Patch size: %sx%s", img_height_model, img_width_model) margin = int(marginal_of_patch_percent * img_height_model) width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin From 817e5a6af920ec10c9816052df08148b6f8603f8 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 31 Mar 2023 01:32:10 +0200 Subject: [PATCH 41/67] update docstring --- qurator/eynollah/eynollah.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index f210fcd..2bb09a1 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3,7 +3,7 @@ # pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods, # pylint: disable=consider-using-enumerate """ -tool to extract table form data from alto xml data +document layout analysis (segmentation) with output in PAGE-XML """ import math From 31be7892a069d582c3bf20f2f9eafd0e918cf89b Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 31 Mar 2023 02:19:07 +0200 Subject: [PATCH 42/67] Makefile hack to rename model dir --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index e5227b2..b85a526 100644 --- a/Makefile +++ b/Makefile @@ -21,14 +21,14 @@ help: # Download and extract models to $(PWD)/models_eynollah models: models_eynollah -models_eynollah: models_eynollah.tar.gz - tar xf models_eynollah.tar.gz +models_eynollah: models_eynollah_renamed.tar.gz + tar xf models_eynollah_renamed.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' # tar xf models_eynollah_renamed.tar.gz # tar xf 2022-04-05.SavedModel.tar.gz models_eynollah.tar.gz: - wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' - # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' + # wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' + wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' # wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' # Install with pip From fd4c0ed4e86e12a6834c0e49811be93b3040e599 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 31 Mar 2023 02:21:36 +0200 Subject: [PATCH 43/67] Update Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b85a526..8706995 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ help: # Download and extract models to $(PWD)/models_eynollah models: models_eynollah -models_eynollah: models_eynollah_renamed.tar.gz +models_eynollah: models_eynollah.tar.gz tar xf models_eynollah_renamed.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' # tar xf models_eynollah_renamed.tar.gz # tar xf 2022-04-05.SavedModel.tar.gz From aecc2ea543225578dc8eec85e26875a0903cdc44 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Fri, 31 Mar 2023 03:18:18 +0200 Subject: [PATCH 44/67] Update README.md added some badges --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index da11b82..29ed56f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ # Eynollah +[![CircleCI Build Status](https://circleci.com/gh/qurator-spk/eynollah.svg?style=svg)](https://circleci.com/gh/qurator-spk/eynollah) +[![PyPI Version](https://img.shields.io/pypi/v/eynollah)](https://pypi.org/project/eynollah/) +[![License: ASL](https://img.shields.io/github/license/qurator-spk/eynollah)](https://github.com/qurator-spk/eynollah/blob/main/LICENSE) > Perform document layout analysis (segmentation) from image data and return the results as [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML). ![](https://user-images.githubusercontent.com/952378/102350683-8a74db80-3fa5-11eb-8c7e-f743f7d6eae2.jpg) From 0279ebfe1322756b73a00164b53b4258d59c0297 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Fri, 31 Mar 2023 03:19:44 +0200 Subject: [PATCH 45/67] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 29ed56f..0d84993 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Eynollah [![CircleCI Build Status](https://circleci.com/gh/qurator-spk/eynollah.svg?style=svg)](https://circleci.com/gh/qurator-spk/eynollah) [![PyPI Version](https://img.shields.io/pypi/v/eynollah)](https://pypi.org/project/eynollah/) -[![License: ASL](https://img.shields.io/github/license/qurator-spk/eynollah)](https://github.com/qurator-spk/eynollah/blob/main/LICENSE) +[![License: ASL](https://img.shields.io/github/license/qurator-spk/eynollah)](https://opensource.org/license/apache-2-0/) > Perform document layout analysis (segmentation) from image data and return the results as [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML). ![](https://user-images.githubusercontent.com/952378/102350683-8a74db80-3fa5-11eb-8c7e-f743f7d6eae2.jpg) From 22a8e93031b806044d0bcac1d30195b328417f27 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Wed, 5 Apr 2023 10:40:18 +0200 Subject: [PATCH 46/67] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0d84993..e400d51 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ # Eynollah +> Perform document layout analysis (segmentation) from image data and return the results as [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) + [![CircleCI Build Status](https://circleci.com/gh/qurator-spk/eynollah.svg?style=svg)](https://circleci.com/gh/qurator-spk/eynollah) [![PyPI Version](https://img.shields.io/pypi/v/eynollah)](https://pypi.org/project/eynollah/) [![License: ASL](https://img.shields.io/github/license/qurator-spk/eynollah)](https://opensource.org/license/apache-2-0/) -> Perform document layout analysis (segmentation) from image data and return the results as [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML). ![](https://user-images.githubusercontent.com/952378/102350683-8a74db80-3fa5-11eb-8c7e-f743f7d6eae2.jpg) From d3735b12f49b4f5b71ba576c392cd9ffcc74b408 Mon Sep 17 00:00:00 2001 From: vahid Date: Tue, 11 Apr 2023 13:12:20 +0200 Subject: [PATCH 47/67] pushing commits 2d9ccac and 7345f6b into eynollah_light --- qurator/eynollah/eynollah.py | 204 +++++++---------------------------- 1 file changed, 38 insertions(+), 166 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 2bb09a1..c9e6674 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -229,6 +229,8 @@ class Eynollah: self.model_textline_dir = dir_models + "/eynollah-textline_20210425.h5" self.model_tables = dir_models + "/eynollah-tables_20210319.h5" + self.models = {} + if dir_in and light_version: config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True @@ -391,10 +393,6 @@ class Eynollah: prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg prediction_true = prediction_true.astype(int) - session_enhancement.close() - del model_enhancement - del session_enhancement - gc.collect() return prediction_true @@ -500,13 +498,6 @@ class Eynollah: num_col = np.argmax(label_p_pred[0]) + 1 self.logger.info("Found %s columns (%s)", num_col, label_p_pred) - if not self.dir_in: - session_col_classifier.close() - - del model_num_classifier - del session_col_classifier - K.clear_session() - gc.collect() @@ -537,12 +528,6 @@ class Eynollah: prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - if not self.dir_in: - session_bin.close() - del model_bin - del session_bin - gc.collect() - prediction_bin = prediction_bin.astype(np.uint8) img= np.copy(prediction_bin) img_bin = np.copy(prediction_bin) @@ -579,10 +564,7 @@ class Eynollah: label_p_pred = model_num_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 - self.logger.info("Found %s columns (%s)", num_col, label_p_pred) - if not self.dir_in: - session_col_classifier.close() - K.clear_session() + self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) if dpi < DPI_THRESHOLD: img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) @@ -595,8 +577,6 @@ class Eynollah: num_column_is_classified = True image_res = np.copy(img) is_image_enhanced = False - if not self.dir_in: - session_col_classifier.close() self.logger.debug("exit resize_and_enhance_image_with_column_classifier") @@ -665,9 +645,14 @@ class Eynollah: def start_new_session_and_model(self, model_dir): self.logger.debug("enter start_new_session_and_model (model_dir=%s)", model_dir) - gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) + #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True) - session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) + #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) + physical_devices = tf.config.list_physical_devices('GPU') + try: + tf.config.experimental.set_memory_growth(physical_devices[0], True) + except: + self.logger.warning("no GPU device available") # try: # model = load_model(model_dir, compile=False) @@ -676,9 +661,13 @@ class Eynollah: if model_dir.endswith('.h5') and Path(model_dir[:-3]).exists(): # prefer SavedModel over HDF5 format if it exists model_dir = model_dir[:-3] - model = load_model(model_dir, compile=False) + if model_dir in self.models: + model = self.models[model_dir] + else: + model = load_model(model_dir, compile=False) + self.models[model_dir] = model - return model, session + return model, None def do_prediction(self, patches, img, model, marginal_of_patch_percent=0.1): self.logger.debug("enter do_prediction") @@ -797,8 +786,8 @@ class Eynollah: prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color prediction_true = prediction_true.astype(np.uint8) - del model - gc.collect() + #del model + #gc.collect() return prediction_true def do_prediction_new_concept(self, patches, img, model, marginal_of_patch_percent=0.1): self.logger.debug("enter do_prediction") @@ -963,17 +952,19 @@ class Eynollah: prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color prediction_true = prediction_true.astype(np.uint8) - del model - gc.collect() + ##del model + ##gc.collect() return prediction_true def extract_page(self): self.logger.debug("enter extract_page") cont_page = [] if not self.ignore_page_extraction: + img = cv2.GaussianBlur(self.image, (5, 5), 0) + if not self.dir_in: model_page, session_page = self.start_new_session_and_model(self.model_page_dir) - img = cv2.GaussianBlur(self.image, (5, 5), 0) + if not self.dir_in: img_page_prediction = self.do_prediction(False, img, model_page) else: @@ -1003,12 +994,7 @@ class Eynollah: box = [0, 0, img.shape[1], img.shape[0]] croped_page, page_coord = crop_image_inside_box(box, self.image) cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - if not self.dir_in: - session_page.close() - del model_page - del session_page - K.clear_session() - gc.collect() + self.logger.debug("exit extract_page") else: box = [0, 0, self.image.shape[1], self.image.shape[0]] @@ -1046,14 +1032,6 @@ class Eynollah: box = [0, 0, img.shape[1], img.shape[0]] croped_page, page_coord = crop_image_inside_box(box, img) - if not self.dir_in: - session_page.close() - del model_page - del session_page - K.clear_session() - - gc.collect() - self.logger.debug("exit early_page_for_num_of_column_classification") else: img = self.imread() @@ -1156,12 +1134,6 @@ class Eynollah: prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) - if not self.dir_in: - session_region.close() - del model_region - del session_region - gc.collect() - self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 @@ -1558,8 +1530,6 @@ class Eynollah: prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) - if not self.dir_in: - session_textline.close() if self.textline_light: return (prediction_textline[:, :, 0]==1)*1, (prediction_textline_longshot_true_size[:, :, 0]==1)*1 @@ -1631,8 +1601,6 @@ class Eynollah: else: img_w_new = 4000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) - gc.collect() - ##img_resized = resize_image(img_bin,img_height_h, img_width_h ) img_resized = resize_image(img,img_h_new, img_w_new ) if not self.dir_in: @@ -1645,11 +1613,6 @@ class Eynollah: prediction_bin = prediction_bin*255 prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - if not self.dir_in: - session_bin.close() - del model_bin - del session_bin - gc.collect() prediction_bin = prediction_bin.astype(np.uint16) #img= np.copy(prediction_bin) @@ -1695,9 +1658,6 @@ class Eynollah: text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - #erosion_hurts = True - if not self.dir_in: - K.clear_session() return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): @@ -1742,16 +1702,9 @@ class Eynollah: prediction_regions_org = self.do_prediction(True, img, model_region) prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) - ##plt.imshow(prediction_regions_org[:,:,0]) - ##plt.show() prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros_y[:,:]==1)]=0 - if not self.dir_in: - session_region.close() - del model_region - del session_region - gc.collect() if not self.dir_in: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2) @@ -1763,11 +1716,6 @@ class Eynollah: prediction_regions_org2 = self.do_prediction(True, img, model_region, 0.2) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) - if not self.dir_in: - session_region.close() - del model_region - del session_region - gc.collect() mask_zeros2 = (prediction_regions_org2[:,:,0] == 0) mask_lines2 = (prediction_regions_org2[:,:,0] == 3) @@ -1788,8 +1736,6 @@ class Eynollah: mask_lines_only=(prediction_regions_org[:,:]==3)*1 prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2) - #plt.imshow(text_region2_1st_channel) - #plt.show() prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2) @@ -1811,11 +1757,6 @@ class Eynollah: prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - if not self.dir_in: - session_bin.close() - del model_bin - del session_bin - gc.collect() if not self.dir_in: @@ -1834,11 +1775,6 @@ class Eynollah: prediction_regions_org=prediction_regions_org[:,:,0] mask_lines_only=(prediction_regions_org[:,:]==3)*1 - if not self.dir_in: - session_region.close() - del model_region - del session_region - gc.collect() mask_texts_only=(prediction_regions_org[:,:]==1)*1 @@ -1859,19 +1795,11 @@ class Eynollah: text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) - if not self.dir_in: - K.clear_session() return text_regions_p_true, erosion_hurts, polygons_lines_xml except: if self.input_binary: prediction_bin = np.copy(img_org) - else: - if not self.dir_in: - session_region.close() - del model_region - del session_region - gc.collect() if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) @@ -1887,12 +1815,6 @@ class Eynollah: prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - if not self.dir_in: - session_bin.close() - del model_bin - del session_bin - gc.collect() if not self.dir_in: @@ -1910,11 +1832,6 @@ class Eynollah: prediction_regions_org=prediction_regions_org[:,:,0] #mask_lines_only=(prediction_regions_org[:,:]==3)*1 - if not self.dir_in: - session_region.close() - del model_region - del session_region - gc.collect() #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) @@ -1925,12 +1842,6 @@ class Eynollah: #prediction_regions_org = prediction_regions_org[:,:,0] #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 - #session_region.close() - #del model_region - #del session_region - #gc.collect() - - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 @@ -1957,8 +1868,6 @@ class Eynollah: text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) erosion_hurts = True - if not self.dir_in: - K.clear_session() return text_regions_p_true, erosion_hurts, polygons_lines_xml def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): @@ -2515,10 +2424,6 @@ class Eynollah: prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) - - del model_region - del session_region - gc.collect() return prediction_table_erode.astype(np.int16) @@ -2619,8 +2524,7 @@ class Eynollah: self.logger.info("Resizing and enhancing image...") is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier(light_version) self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ') - if not self.dir_in: - K.clear_session() + scale = 1 if is_image_enhanced: if self.allow_enhancement: @@ -2646,8 +2550,6 @@ class Eynollah: textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) - if not self.dir_in: - K.clear_session() if self.plotter: self.plotter.save_plot_of_textlines(textline_mask_tot_ea, image_page) return textline_mask_tot_ea @@ -2660,7 +2562,7 @@ class Eynollah: if self.plotter: self.plotter.save_deskewed_image(slope_deskew) - self.logger.info("slope_deskew: %s", slope_deskew) + self.logger.info("slope_deskew: %.2f°", slope_deskew) return slope_deskew, slope_first def run_marginals(self, image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): @@ -2709,8 +2611,6 @@ class Eynollah: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if not self.dir_in: - K.clear_session() self.logger.info("num_col_classifier: %s", num_col_classifier) @@ -2775,8 +2675,6 @@ class Eynollah: pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - if not self.dir_in: - K.clear_session() self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables @@ -2807,9 +2705,6 @@ class Eynollah: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, splitter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),num_col_classifier, self.tables, pixel_lines) - if not self.dir_in: - K.clear_session() - gc.collect() if num_col_classifier>=3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -2875,38 +2770,22 @@ class Eynollah: text_regions_p[:, :][text_regions_p[:, :] == 2] = 5 text_regions_p[:, :][text_regions_p[:, :] == 3] = 6 text_regions_p[:, :][text_regions_p[:, :] == 4] = 8 - if not self.dir_in: - K.clear_session() + image_page = image_page.astype(np.uint8) regions_fully, regions_fully_only_drop = self.extract_text_regions(image_page, True, cols=num_col_classifier) text_regions_p[:,:][regions_fully[:,:,0]==6]=6 regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 - if not self.dir_in: - K.clear_session() - # plt.imshow(regions_fully[:,:,0]) - # plt.show() regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully) - # plt.imshow(regions_fully[:,:,0]) - # plt.show() - if not self.dir_in: - K.clear_session() + regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) - # plt.imshow(regions_fully_np[:,:,0]) - # plt.show() if num_col_classifier > 2: regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 else: regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) - # plt.imshow(regions_fully_np[:,:,0]) - # plt.show() - if not self.dir_in: - K.clear_session() - # plt.imshow(regions_fully[:,:,0]) - # plt.show() regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() @@ -2929,8 +2808,6 @@ class Eynollah: regions_without_separators_d = None if not self.tables: regions_without_separators = (text_regions_p[:, :] == 1) * 1 - if not self.dir_in: - K.clear_session() img_revised_tab = np.copy(text_regions_p[:, :]) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) self.logger.debug('exit run_boxes_full_layout') @@ -3025,13 +2902,12 @@ class Eynollah: contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(contours_only_text_parent[j]) for j in range(len(contours_only_text_parent))]) + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) #self.logger.info('areas_cnt_text %s', areas_cnt_text) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [contours_only_text_parent[jz] for jz in range(len(contours_only_text_parent)) if areas_cnt_text[jz] > min_con_area] - areas_cnt_text_parent = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > min_con_area] - + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > min_con_area] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area] index_con_parents = np.argsort(areas_cnt_text_parent) contours_only_text_parent = list(np.array(contours_only_text_parent)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) @@ -3042,14 +2918,14 @@ class Eynollah: contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - areas_cnt_text_d = np.array([cv2.contourArea(contours_only_text_parent_d[j]) for j in range(len(contours_only_text_parent_d))]) + areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d=np.argsort(areas_cnt_text_d) - contours_only_text_parent_d=list(np.array(contours_only_text_parent_d)[index_con_parents_d] ) - areas_cnt_text_d=list(np.array(areas_cnt_text_d)[index_con_parents_d] ) + index_con_parents_d = np.argsort(areas_cnt_text_d) + contours_only_text_parent_d = list(np.array(contours_only_text_parent_d)[index_con_parents_d]) + areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) @@ -3103,12 +2979,12 @@ class Eynollah: contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(contours_only_text_parent[j]) for j in range(len(contours_only_text_parent))]) + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [contours_only_text_parent[jz] for jz in range(len(contours_only_text_parent)) if areas_cnt_text[jz] > min_con_area] - areas_cnt_text_parent = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > min_con_area] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > min_con_area] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area] index_con_parents = np.argsort(areas_cnt_text_parent) contours_only_text_parent = list(np.array(contours_only_text_parent)[index_con_parents]) @@ -3146,8 +3022,6 @@ class Eynollah: all_found_texline_polygons = small_textlines_to_parent_adherence2(all_found_texline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_texline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_texline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - if not self.dir_in: - K.clear_session() if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: @@ -3167,8 +3041,6 @@ class Eynollah: if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page) self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - if not self.dir_in: - K.clear_session() pixel_img = 4 polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) From abb0b293f549e6443eced4ce56bb86a9660b9b36 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Sun, 2 Apr 2023 14:07:51 +0200 Subject: [PATCH 48/67] use find_namespace_packages in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9abf158..f4dc6b1 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from setuptools import setup, find_packages +from setuptools import find_namespace_packages, find_packages, setup from json import load install_requires = open('requirements.txt').read().split('\n') From 456fccb35e184db1e7a0e73965e446ed8993f41a Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 12 Apr 2023 23:59:46 +0200 Subject: [PATCH 49/67] use the SavedModel format --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 8706995..525e6c3 100644 --- a/Makefile +++ b/Makefile @@ -22,14 +22,14 @@ help: models: models_eynollah models_eynollah: models_eynollah.tar.gz - tar xf models_eynollah_renamed.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' + # tar xf models_eynollah_renamed.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' # tar xf models_eynollah_renamed.tar.gz - # tar xf 2022-04-05.SavedModel.tar.gz + tar xf 2022-04-05.SavedModel.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' models_eynollah.tar.gz: # wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' - wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' - # wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' + # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' + wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' # Install with pip install: From 63d996880d42a6b49b0fa0d48f3c69b902f72d43 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Thu, 13 Apr 2023 00:42:04 +0200 Subject: [PATCH 50/67] include 3.8 in GitHub Actions --- .github/workflows/test-eynollah.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index de742f1..e06cb35 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7'] # '3.8' + python-version: ['3.7', '3.8'] steps: - uses: actions/checkout@v2 From f264eaf424237e11a5d1f2d6199a2d0805eb37af Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Thu, 13 Apr 2023 12:28:03 +0200 Subject: [PATCH 51/67] test CircleCI machine executor (more RAM?) --- .circleci/config.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 23eb724..4ae0994 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -3,8 +3,8 @@ version: 2 jobs: build-python37: - docker: - - image: python:3.7 + machine: + - image: ubuntu-2004:2023.02.1 steps: - checkout - restore_cache: @@ -20,8 +20,8 @@ jobs: - run: make smoke-test build-python38: - docker: - - image: python:3.8 + machine: + - image: ubuntu-2004:2023.02.1 steps: - checkout - restore_cache: @@ -40,6 +40,5 @@ workflows: version: 2 build: jobs: - #- build-python36 - build-python37 - build-python38 From 0462ae0b975f2d7827aeb7cf1648cab4e559e1d3 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Thu, 13 Apr 2023 16:31:14 +0200 Subject: [PATCH 52/67] Update config.yml --- .circleci/config.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4ae0994..092a37c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -16,6 +16,9 @@ jobs: paths: models_eynollah.tar.gz models_eynollah + - run: + name: "Set Python Version" + command: pyenv install -s 3.7.16 && pyenv global 3.7.16 - run: make install - run: make smoke-test @@ -33,6 +36,9 @@ jobs: paths: models_eynollah.tar.gz models_eynollah + - run: + name: "Set Python Version" + command: pyenv install -s 3.8.16 && pyenv global 3.8.16 - run: make install - run: make smoke-test From cb8cfad76153bd23ac0fe0f1d0e23bb9dd81a546 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Thu, 13 Apr 2023 16:35:46 +0200 Subject: [PATCH 53/67] Update config.yml --- .circleci/config.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 092a37c..751ea54 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -17,8 +17,8 @@ jobs: models_eynollah.tar.gz models_eynollah - run: - name: "Set Python Version" - command: pyenv install -s 3.7.16 && pyenv global 3.7.16 + name: "Set Python Version" + command: pyenv install -s 3.7.16 && pyenv global 3.7.16 - run: make install - run: make smoke-test @@ -37,8 +37,8 @@ jobs: models_eynollah.tar.gz models_eynollah - run: - name: "Set Python Version" - command: pyenv install -s 3.8.16 && pyenv global 3.8.16 + name: "Set Python Version" + command: pyenv install -s 3.8.16 && pyenv global 3.8.16 - run: make install - run: make smoke-test From c251c4f4c80bd02c284c7f93712374016e006658 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 14 Apr 2023 02:11:51 +0200 Subject: [PATCH 54/67] update badges --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e400d51..f51012e 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ # Eynollah > Perform document layout analysis (segmentation) from image data and return the results as [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) -[![CircleCI Build Status](https://circleci.com/gh/qurator-spk/eynollah.svg?style=svg)](https://circleci.com/gh/qurator-spk/eynollah) [![PyPI Version](https://img.shields.io/pypi/v/eynollah)](https://pypi.org/project/eynollah/) +[![CircleCI Build Status](https://circleci.com/gh/qurator-spk/eynollah.svg?style=shield)](https://circleci.com/gh/qurator-spk/eynollah) +[![GH Actions Test](https://github.com/qurator-spk/eynollah/actions/workflows/test-eynollah.yml/badge.svg)](https://github.com/qurator-spk/eynollah/actions/workflows/test-eynollah.yml) [![License: ASL](https://img.shields.io/github/license/qurator-spk/eynollah)](https://opensource.org/license/apache-2-0/) ![](https://user-images.githubusercontent.com/952378/102350683-8a74db80-3fa5-11eb-8c7e-f743f7d6eae2.jpg) From 50b9ce3350661ac6d3e7f64be3df792f4a0a3d24 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 14 Apr 2023 02:48:42 +0200 Subject: [PATCH 55/67] Update README.md --- README.md | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index f51012e..7ce0782 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Eynollah -> Perform document layout analysis (segmentation) from image data and return the results as [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) +> Document Layout Analysis (segmentation) using pre-trained models and heuristics [![PyPI Version](https://img.shields.io/pypi/v/eynollah)](https://pypi.org/project/eynollah/) [![CircleCI Build Status](https://circleci.com/gh/qurator-spk/eynollah.svg?style=shield)](https://circleci.com/gh/qurator-spk/eynollah) @@ -8,24 +8,38 @@ ![](https://user-images.githubusercontent.com/952378/102350683-8a74db80-3fa5-11eb-8c7e-f743f7d6eae2.jpg) +## Features +* Support for up to 10 segmentation classes: + * background, page border, text region, text line, header, image, separator, marginalia, initial (drop capital), table +* Support for various image optimization operations: + * cropping (border detection), binarization, deskewing, dewarping, scaling, enhancing, resizing +* Text line segmentation to bounding boxes or polygons (contours) including curved lines and vertical text +* Detection of reading order +* Output in [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) format + ## Installation -`pip install .` or +Python versions `3.7-3.10` with Tensorflow `>=2.4` are currently supported. -`pip install -e .` for editable installation +For (minimal) GPU support the [matching](https://www.tensorflow.org/install/source#gpu) CUDA toolkit `>=10.1` needs to be installed. -Alternatively, you can also use `make` with these targets: +You can either install via -`make install` or +``` +pip install eynollah +``` -`make install-dev` for editable installation +or clone the repository, enter it and install (editable) with -The current version of Eynollah runs on Python `>=3.7` with Tensorflow `>=2.4`. +``` +git clone git@github.com:qurator-spk/eynollah.git +cd eynollah; pip install -e . +``` -In order to use a GPU for inference, the CUDA toolkit version 10.x needs to be installed. +Alternatively, you can run `make install` or `make install-dev` for editable installation. ### Models -In order to run this tool you need trained models. You can download our pretrained models from [qurator-data.de](https://qurator-data.de/eynollah/). +Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/). Alternatively, running `make models` will download and extract models to `$(PWD)/models_eynollah`. @@ -38,7 +52,11 @@ In case you want to train your own model to use with Eynollah, have a look at [s The command-line interface can be called like this: ```sh -eynollah -i -o -m [OPTIONS] +eynollah \ + -i \ + -o \ + -m \ + [OPTIONS] ``` The following options can be used to further configure the processing: @@ -182,5 +200,4 @@ would still use the original (RGB) image despite any binarization that may have - - + \ No newline at end of file From d98689edad30cba72749d3abee65d908e6980282 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 14 Apr 2023 03:13:07 +0200 Subject: [PATCH 56/67] Update README.md --- README.md | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 7ce0782..dd4324a 100644 --- a/README.md +++ b/README.md @@ -13,14 +13,14 @@ * background, page border, text region, text line, header, image, separator, marginalia, initial (drop capital), table * Support for various image optimization operations: * cropping (border detection), binarization, deskewing, dewarping, scaling, enhancing, resizing -* Text line segmentation to bounding boxes or polygons (contours) including curved lines and vertical text +* Text line segmentation to bounding boxes or polygons (contours) including for curved lines and vertical text * Detection of reading order -* Output in [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) format +* Output in [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) ## Installation Python versions `3.7-3.10` with Tensorflow `>=2.4` are currently supported. -For (minimal) GPU support the [matching](https://www.tensorflow.org/install/source#gpu) CUDA toolkit `>=10.1` needs to be installed. +For (limited) GPU support the [matching](https://www.tensorflow.org/install/source#gpu) CUDA toolkit `>=10.1` needs to be installed. You can either install via @@ -43,8 +43,6 @@ Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data Alternatively, running `make models` will download and extract models to `$(PWD)/models_eynollah`. -### Training - In case you want to train your own model to use with Eynollah, have a look at [sbb_pixelwise_segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation). ## Usage @@ -61,22 +59,21 @@ eynollah \ The following options can be used to further configure the processing: -``` --fl perform full layout analysis including detection of headers and drop capitals --tab try to detect tables --light apply a faster but simpler method for main region detection and deskewing --ae allow resizing and enhancing the input image, the enhanced image is saved to the output directory --as allow scaling - automatically check whether the input image needs scaling or not --ib allow binarization of the input image --ho ignore headers for reading order prediction --cl extract contours of curved textlines instead of rectangle bounding boxes --ep enables plotting. This MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae` options --di process all images in a directory in batch mode --si save image regions detected in documents to this directory --sd save deskewed image to this directory --sl save layout prediction as plot to this directory --sa save all outputs (plot, enhanced or binary image and layout prediction) to this directory -``` +| option | description | +|----------|:-------------| +| `-fl` | apply full layout analysis including all steps and segmentation classes | +| `-light` | apply a lighter and faster but simpler method for main region detection and deskewing | +| `-tab` | apply table detection | +| `-ae` | apply enhancement (the resulting image is saved to the output directory) | +| `-as` | apply scaling | +| `-ib` | apply binarization (the resulting image is saved to the output directory) | +| `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) | +| `-ho` | ignore headers for reading order dectection | +| `-di ` | process all images in a directory in batch mode | +| `-si ` | save image regions detected in documents to this directory | +| `-sd ` | save deskewed image to this directory | +| `-sl ` | save layout prediction as plot to this directory | +| `-sa ` | save all (plot, enhanced, binary image and layout prediction) to this directory | The tool performs better with RGB images as input than with greyscale or binarized images. From 000e39c676ade539c5cb14bdbf7e64413ae71b59 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 14 Apr 2023 03:15:45 +0200 Subject: [PATCH 57/67] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dd4324a..85816d1 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ ## Features * Support for up to 10 segmentation classes: - * background, page border, text region, text line, header, image, separator, marginalia, initial (drop capital), table + * background, page border, text region, text line, header, image, separator, marginalia, initial, table * Support for various image optimization operations: * cropping (border detection), binarization, deskewing, dewarping, scaling, enhancing, resizing * Text line segmentation to bounding boxes or polygons (contours) including for curved lines and vertical text From fef7cf309b62b53dc153d01c17eba7a83af7b4d3 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 14 Apr 2023 03:21:24 +0200 Subject: [PATCH 58/67] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 85816d1..c6c6b2e 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ The following options can be used to further configure the processing: | `-tab` | apply table detection | | `-ae` | apply enhancement (the resulting image is saved to the output directory) | | `-as` | apply scaling | +| `-cl` | apply polygonal countour detection for curved text lines | | `-ib` | apply binarization (the resulting image is saved to the output directory) | | `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) | | `-ho` | ignore headers for reading order dectection | @@ -73,6 +74,7 @@ The following options can be used to further configure the processing: | `-si ` | save image regions detected in documents to this directory | | `-sd ` | save deskewed image to this directory | | `-sl ` | save layout prediction as plot to this directory | +| `-sp ` | save cropped page image to this directory | | `-sa ` | save all (plot, enhanced, binary image and layout prediction) to this directory | The tool performs better with RGB images as input than with greyscale or binarized images. From 1e172cca5dff556d9b0ce7d68382c34cdc98e536 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 14 Apr 2023 03:25:01 +0200 Subject: [PATCH 59/67] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c6c6b2e..35d36ca 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ * Text line segmentation to bounding boxes or polygons (contours) including for curved lines and vertical text * Detection of reading order * Output in [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) +* [OCR-D](https://github.com/qurator-spk/eynollah#use-as-ocr-d-processor) interface ## Installation Python versions `3.7-3.10` with Tensorflow `>=2.4` are currently supported. From 70786377dcb0232180f69378ffea52e26cd9c476 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 14 Apr 2023 03:33:01 +0200 Subject: [PATCH 60/67] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 35d36ca..d5505ad 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ ## Features * Support for up to 10 segmentation classes: - * background, page border, text region, text line, header, image, separator, marginalia, initial, table + * background, [page border](https://ocr-d.de/en/gt-guidelines/trans/lyRand.html), [text region](https://ocr-d.de/en/gt-guidelines/pagexml/pagecontent_xsd_Complex_Type_pc_TextRegionType.html), [text line](https://ocr-d.de/en/gt-guidelines/pagexml/pagecontent_xsd_Complex_Type_pc_TextLineType.html), [header](https://ocr-d.de/en/gt-guidelines/trans/lyUeberschrift.html), [image](https://ocr-d.de/en/gt-guidelines/pagexml/pagecontent_xsd_Complex_Type_pc_ImageRegionType.html), [separator](https://ocr-d.de/en/gt-guidelines/pagexml/pagecontent_xsd_Complex_Type_pc_SeparatorRegionType.html), [marginalia](https://ocr-d.de/en/gt-guidelines/trans/lyMarginalie.html), [initial](https://ocr-d.de/en/gt-guidelines/trans/lyInitiale.html), [table](https://ocr-d.de/en/gt-guidelines/trans/lyTabellen.html) * Support for various image optimization operations: * cropping (border detection), binarization, deskewing, dewarping, scaling, enhancing, resizing * Text line segmentation to bounding boxes or polygons (contours) including for curved lines and vertical text From cb5ffaee141989df23fddf8d6e6824205e99bddc Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Fri, 14 Apr 2023 13:24:13 +0200 Subject: [PATCH 61/67] Update README.md --- README.md | 130 ++++++------------------------------------------------ 1 file changed, 14 insertions(+), 116 deletions(-) diff --git a/README.md b/README.md index d5505ad..4b7be73 100644 --- a/README.md +++ b/README.md @@ -65,9 +65,9 @@ The following options can be used to further configure the processing: | `-fl` | apply full layout analysis including all steps and segmentation classes | | `-light` | apply a lighter and faster but simpler method for main region detection and deskewing | | `-tab` | apply table detection | -| `-ae` | apply enhancement (the resulting image is saved to the output directory) | +| `-ae` | apply enhancement and adapt coordinates (the resulting image is saved to the output directory) | | `-as` | apply scaling | -| `-cl` | apply polygonal countour detection for curved text lines | +| `-cl` | apply polygonal countour detection for curved text lines instead of rectangular bounding boxes | | `-ib` | apply binarization (the resulting image is saved to the output directory) | | `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) | | `-ho` | ignore headers for reading order dectection | @@ -78,126 +78,24 @@ The following options can be used to further configure the processing: | `-sp ` | save cropped page image to this directory | | `-sa ` | save all (plot, enhanced, binary image and layout prediction) to this directory | -The tool performs better with RGB images as input than with greyscale or binarized images. +If no option is set, the tool will perform layout detection of main regions (background, text, images, separators and marginals). -## Documentation - -
- click to expand/collapse - -### Region types - -
- click to expand/collapse
- -Eynollah can currently be used to detect the following region types/elements: -* [Border](https://ocr-d.de/en/gt-guidelines/pagexml/pagecontent_xsd_Complex_Type_pc_BorderType.html) -* [Textregion](https://ocr-d.de/en/gt-guidelines/pagexml/pagecontent_xsd_Complex_Type_pc_TextRegionType.html) -* [Textline](https://ocr-d.de/en/gt-guidelines/pagexml/pagecontent_xsd_Complex_Type_pc_TextLineType.html) -* [Image](https://ocr-d.de/en/gt-guidelines/pagexml/pagecontent_xsd_Complex_Type_pc_ImageRegionType.html) -* [Separator](https://ocr-d.de/en/gt-guidelines/pagexml/pagecontent_xsd_Complex_Type_pc_SeparatorRegionType.html) -* [Marginalia](https://ocr-d.de/en/gt-guidelines/trans/lyMarginalie.html) -* [Initial (Drop Capital)](https://ocr-d.de/en/gt-guidelines/trans/lyInitiale.html) - -In addition, the tool can detect the [ReadingOrder](https://ocr-d.de/en/gt-guidelines/trans/lyLeserichtung.html) of regions. The final goal is to feed the output to an OCR model. - -
- -### Method description - -
- click to expand/collapse
- -Eynollah uses a combination of various models and heuristics (see flowchart below for the different stages and how they interact): -* [Border detection](https://github.com/qurator-spk/eynollah#border-detection) -* [Layout detection](https://github.com/qurator-spk/eynollah#layout-detection) -* [Textline detection](https://github.com/qurator-spk/eynollah#textline-detection) -* [Image enhancement](https://github.com/qurator-spk/eynollah#Image_enhancement) -* [Scale classification](https://github.com/qurator-spk/eynollah#Scale_classification) -* [Heuristic methods](https://https://github.com/qurator-spk/eynollah#heuristic-methods) - -The first three stages are based on [pixel-wise segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation). - -![](https://user-images.githubusercontent.com/952378/100619946-1936f680-331e-11eb-9297-6e8b4cab3c16.png) - -#### Border detection -For the purpose of text recognition (OCR) and in order to avoid noise being introduced from texts outside the printspace, one first needs to detect the border of the printed frame. This is done by a binary pixel-wise-segmentation model trained on a dataset of 2,000 documents where about 1,200 of them come from the [dhSegment](https://github.com/dhlab-epfl/dhSegment/) project (you can download the dataset from [here](https://github.com/dhlab-epfl/dhSegment/releases/download/v0.2/pages.zip)) and the remainder having been annotated in SBB. For border detection, the model needs to be fed with the whole image at once rather than separated in patches. - -### Layout detection -As a next step, text regions need to be identified by means of layout detection. Again a pixel-wise segmentation model was trained on 131 labeled images from the SBB digital collections, including some data augmentation. Since the target of this tool are historical documents, we consider as main region types text regions, separators, images, tables and background - each with their own subclasses, e.g. in the case of text regions, subclasses like header/heading, drop capital, main body text etc. While it would be desirable to detect and classify each of these classes in a granular way, there are also limitations due to having a suitably large and balanced training set. Accordingly, the current version of this tool is focussed on the main region types background, text region, image and separator. - -#### Textline detection -In a subsequent step, binary pixel-wise segmentation is used again to classify pixels in a document that constitute textlines. For textline segmentation, a model was initially trained on documents with only one column/block of text and some augmentation with regard to scaling. By fine-tuning the parameters also for multi-column documents, additional training data was produced that resulted in a much more robust textline detection model. - -#### Image enhancement -This is an image to image model which input was low quality of an image and label was actually the original image. For this one we did not have any GT, so we decreased the quality of documents in SBB and then feed them into model. - -#### Scale classification -This is simply an image classifier which classifies images based on their scales or better to say based on their number of columns. - -### Heuristic methods -Some heuristic methods are also employed to further improve the model predictions: -* After border detection, the largest contour is determined by a bounding box, and the image cropped to these coordinates. -* For text region detection, the image is scaled up to make it easier for the model to detect background space between text regions. -* A minimum area is defined for text regions in relation to the overall image dimensions, so that very small regions that are noise can be filtered out. -* Deskewing is applied on the text region level (due to regions having different degrees of skew) in order to improve the textline segmentation result. -* After deskewing, a calculation of the pixel distribution on the X-axis allows the separation of textlines (foreground) and background pixels. -* Finally, using the derived coordinates, bounding boxes are determined for each textline. - -
- -### How to use - -
- click to expand/collapse
- -Eynollah makes use of up to 9 trained models which are responsible for different operations like size detection, column classification, image enhancement, page extraction, main layout detection, full layout detection and textline detection.That does not mean that all 9 models are always required for every document. Based on the document characteristics and parameters specified, different scenarios can be applied. - -* If none of the parameters is set to `true`, the tool will perform a layout detection of main regions (background, text, images, separators and marginals). An advantage of this tool is that it tries to extract main text regions separately as much as possible. - -* If you set `-ae` (**a**llow image **e**nhancement) parameter to `true`, the tool will first check the ppi (pixel-per-inch) of the image and when it is less than 300, the tool will resize it and only then image enhancement will occur. Image enhancement can also take place without this option, but by setting this option to `true`, the layout xml data (e.g. coordinates) will be based on the resized and enhanced image instead of the original image. - -* For some documents, while the quality is good, their scale is very large, and the performance of tool decreases. In such cases you can set `-as` (**a**llow **s**caling) to `true`. With this option enabled, the tool will try to rescale the image and only then the layout detection process will begin. - -* If you care about drop capitals (initials) and headings, you can set `-fl` (**f**ull **l**ayout) to `true`. With this setting, the tool can currently distinguish 7 document layout classes/elements. - -* In cases where the document includes curved headers or curved lines, rectangular bounding boxes for textlines will not be a great option. In such cases it is strongly recommended setting the flag `-cl` (**c**urved **l**ines) to `true` to find contours of curved lines instead of rectangular bounding boxes. Be advised that enabling this option increases the processing time of the tool. - -* To crop and save image regions inside the document, set the parameter `-si` (**s**ave **i**mages) to true and provide a directory path to store the extracted images. - -* This tool is actively being developed. If problems occur, or the performance does not meet your expectations, we welcome your feedback via [issues](https://github.com/qurator-spk/eynollah/issues). - -#### `--full-layout` vs `--no-full-layout` - -Here are the difference in elements detected depending on the `--full-layout`/`--no-full-layout` command line flags: - -| | `--full-layout` | `--no-full-layout` | -| --- | --- | --- | -| reading order | x | x | -| header regions | x | - | -| text regions | x | x | -| text regions / text line | x | x | -| drop-capitals | x | - | -| marginals | x | x | -| marginals / text line | x | x | -| image region | x | x | +The tool produces better output from RGB images as input than greyscale or binarized images. #### Use as OCR-D processor -Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor. In this case, the source image file group with (preferably) RGB images should be used as input like this: +Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor. -`ocrd-eynollah-segment -I OCR-D-IMG -O SEG-LINE -P models` - -In fact, the image referenced by `@imageFilename` in PAGE-XML is passed on directly to Eynollah as a processor, so that e.g. calling +In this case, the source image file group with (preferably) RGB images should be used as input like this: -`ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models` +``` +ocrd-eynollah-segment -I OCR-D-IMG -O SEG-LINE -P models +``` -would still use the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps +Any image referenced by `@imageFilename` in PAGE-XML is passed on directly to Eynollah as a processor, so that e.g. calling - #### Eynollah "light" - - Eynollah light uses a faster method to predict and extract the early layout. But with the light option enabled deskewing is not applied for any text region and done only once for the whole document. - -
+``` +ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models +``` -
\ No newline at end of file +still uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps From 529f2c0e19cd99da9735be6321da06657954d355 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Thu, 13 Apr 2023 19:02:41 +0200 Subject: [PATCH 62/67] set_memory_growth to all GPU devices alike --- qurator/eynollah/eynollah.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c9e6674..8444995 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -650,7 +650,8 @@ class Eynollah: #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) physical_devices = tf.config.list_physical_devices('GPU') try: - tf.config.experimental.set_memory_growth(physical_devices[0], True) + for device in physical_devices: + tf.config.experimental.set_memory_growth(device, True) except: self.logger.warning("no GPU device available") From 29e6ad076fdad494414bae6fb89ce8f59fc60ff8 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 18 Apr 2023 13:47:43 +0200 Subject: [PATCH 63/67] renaming textline light model --- qurator/eynollah/eynollah.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 8444995..1346672 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -224,7 +224,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425.h5" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314.h5" if self.textline_light: - self.model_textline_dir = dir_models + "/model_17.h5" + self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425.h5" else: self.model_textline_dir = dir_models + "/eynollah-textline_20210425.h5" self.model_tables = dir_models + "/eynollah-tables_20210319.h5" From 380f59ad675717145e4512c5ed9b9d361c4d6249 Mon Sep 17 00:00:00 2001 From: vahid Date: Tue, 18 Apr 2023 15:06:18 +0200 Subject: [PATCH 64/67] let hybrid textline light model be loaded --- qurator/eynollah/eynollah.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 1346672..4fecfed 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -665,8 +665,12 @@ class Eynollah: if model_dir in self.models: model = self.models[model_dir] else: - model = load_model(model_dir, compile=False) - self.models[model_dir] = model + try: + model = load_model(model_dir, compile=False) + self.models[model_dir] = model + except: + model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) + self.models[model_dir] = model return model, None From d68f240b59cf6a47091d990d05cd74ba521d054c Mon Sep 17 00:00:00 2001 From: vahid Date: Thu, 27 Apr 2023 17:05:21 +0200 Subject: [PATCH 65/67] loading TensorFlow SavedModel format is now present --- qurator/eynollah/eynollah.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 4fecfed..ec65361 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -213,21 +213,21 @@ class Eynollah: self.logger = logger if logger else getLogger('eynollah') self.dir_models = dir_models - self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425.h5" - self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425.h5" - self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425.h5" - self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425.h5" - self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425.h5" - self.model_region_dir_fully_np = dir_models + "/eynollah-full-regions-1column_20210425.h5" - self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425.h5" - self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425.h5" - self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425.h5" - self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314.h5" + self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" + self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" + self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" + self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" + self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" + self.model_region_dir_fully_np = dir_models + "/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" + self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" + self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" + self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" if self.textline_light: - self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425.h5" + self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: - self.model_textline_dir = dir_models + "/eynollah-textline_20210425.h5" - self.model_tables = dir_models + "/eynollah-tables_20210319.h5" + self.model_textline_dir = dir_models + "/eynollah-textline_20210425" + self.model_tables = dir_models + "/eynollah-tables_20210319" self.models = {} @@ -1824,6 +1824,9 @@ class Eynollah: if not self.dir_in: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + + else: + prediction_bin = np.copy(img_org) ratio_y=1 ratio_x=1 From 4c217018ccdff3a409aa4a8ff35fca02eedd3dca Mon Sep 17 00:00:00 2001 From: vahid Date: Thu, 27 Apr 2023 21:07:33 +0200 Subject: [PATCH 66/67] textline light version -tll can not work without enabling -light option --- qurator/eynollah/cli.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index ddf986e..8c42f64 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -164,6 +164,9 @@ def main( elif enable_plotting and not (save_layout or save_deskewed or save_all or save_page or save_images or allow_enhancement): print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa, -sp, -si or -ae") sys.exit(1) + if textline_light and not light_version: + print('Error: You used -tll to enable light textline detection but -light is not enabled') + sys.exit(1) eynollah = Eynollah( image_filename=image, dir_out=out, From 48f2ce62034bbabefe909820ad8b9cee2ebda47f Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Sat, 13 May 2023 02:39:18 +0200 Subject: [PATCH 67/67] re-enable Action for Python 3.8 --- .github/workflows/test-eynollah.yml | 2 +- README.md | 31 ++++++++++------------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index de742f1..e06cb35 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7'] # '3.8' + python-version: ['3.7', '3.8'] steps: - uses: actions/checkout@v2 diff --git a/README.md b/README.md index 1b3a589..07bb411 100644 --- a/README.md +++ b/README.md @@ -38,20 +38,7 @@ cd eynollah; pip install -e . Alternatively, you can run `make install` or `make install-dev` for editable installation. -
- click to expand/collapse
- -First, this model makes use of up to 9 trained models which are responsible for different operations like size detection, column classification, image enhancement, page extraction, main layout detection, full layout detection and textline detection.That does not mean that all 9 models are always required for every document. Based on the document characteristics and parameters specified, different scenarios can be applied. - -Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/). - - -* If you set `-ae` (**a**llow image **e**nhancement) parameter to `true`, the tool will first check the ppi (pixel-per-inch) of the image and when it is less than 300, the tool will resize it and only then image enhancement will occur. Image enhancement can also take place without this option, but by setting this option to `true`, the layout xml data (e.g. coordinates) will be based on the resized and enhanced image instead of the original image. - -In case you want to train your own model to use with Eynollah, have a look at [sbb_pixelwise_segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation). - ## Usage - The command-line interface can be called like this: ```sh @@ -66,26 +53,31 @@ The following options can be used to further configure the processing: | option | description | |----------|:-------------| -| `-fl` | apply full layout analysis including all steps and segmentation classes | -| `-light` | apply a lighter and faster but simpler method for main region detection and deskewing | +| `-fl` | full layout analysis including all steps and segmentation classes | +| `-light` | lighter and faster but simpler method for main region detection and deskewing | | `-tab` | apply table detection | -| `-ae` | apply enhancement and adapt coordinates (the resulting image is saved to the output directory) | +| `-ae` | apply enhancement (the resulting image is saved to the output directory) | | `-as` | apply scaling | -| `-cl` | apply polygonal countour detection for curved text lines instead of rectangular bounding boxes | +| `-cl` | apply countour detection for curved text lines instead of bounding boxes | | `-ib` | apply binarization (the resulting image is saved to the output directory) | | `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) | | `-ho` | ignore headers for reading order dectection | | `-di ` | process all images in a directory in batch mode | -| `-si ` | save image regions detected in documents to this directory | +| `-si ` | save image regions detected to this directory | | `-sd ` | save deskewed image to this directory | | `-sl ` | save layout prediction as plot to this directory | | `-sp ` | save cropped page image to this directory | -| `-sa ` | save all (plot, enhanced, binary image and layout prediction) to this directory | +| `-sa ` | save all (plot, enhanced/binary image, layout) to this directory | If no option is set, the tool will perform layout detection of main regions (background, text, images, separators and marginals). The tool produces better output from RGB images as input than greyscale or binarized images. +## Models +Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/). + +In case you want to train your own model to use with Eynollah, have a look at [sbb_pixelwise_segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation). + #### Use as OCR-D processor Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor. @@ -103,4 +95,3 @@ ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models ``` still uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps -