diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index 5299d3e..30e180d 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -289,7 +289,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_region_dir_p_ens_light_only_images_extraction = dir_models + "/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18" - self.model_reading_order_dir = dir_models + "/model_step_4800000_mb_ro"#"/model_ens_reading_order_machine_based" + self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"#"/model_mb_ro_aug_ens_11"#"/model_step_3200000_mb_ro"#"/model_ens_reading_order_machine_based"#"/model_mb_ro_aug_ens_8"#"/model_ens_reading_order_machine_based" #"/modelens_12sp_elay_0_3_4__3_6_n" #"/modelens_earlylayout_12spaltige_2_3_5_6_7_8" #"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18" @@ -725,6 +725,7 @@ class Eynollah: label_p_pred = self.model_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 + elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): if self.input_binary: img_in = np.copy(img) @@ -3090,6 +3091,26 @@ class Eynollah: num_col = num_col + 1 if not num_column_is_classified: num_col_classifier = num_col + 1 + if self.num_col_upper and self.num_col_lower: + if self.num_col_upper == self.num_col_lower: + num_col_classifier = self.num_col_upper + else: + if num_col_classifier < self.num_col_lower: + num_col_classifier = self.num_col_lower + if num_col_classifier > self.num_col_upper: + num_col_classifier = self.num_col_upper + + elif self.num_col_lower and not self.num_col_upper: + if num_col_classifier < self.num_col_lower: + num_col_classifier = self.num_col_lower + + elif self.num_col_upper and not self.num_col_lower: + if num_col_classifier > self.num_col_upper: + num_col_classifier = self.num_col_upper + + else: + pass + except Exception as why: self.logger.error(why) num_col = None @@ -3223,7 +3244,6 @@ class Eynollah: text_regions_p_1[mask_lines[:, :] == 1] = 3 text_regions_p = text_regions_p_1[:, :] text_regions_p = np.array(text_regions_p) - if num_col_classifier in (1, 2): try: regions_without_separators = (text_regions_p[:, :] == 1) * 1 @@ -4447,6 +4467,43 @@ class Eynollah: return (slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, contours_only_text_parent_rem, index_by_text_par_con_rem_sort) + + def separate_marginals_to_left_and_right_and_order_from_top_to_down(self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width): + cx_marg, cy_marg, _, _, _, _, _ = find_new_features_of_contours( + polygons_of_marginals) + + cx_marg = np.array(cx_marg) + cy_marg = np.array(cy_marg) + + poly_marg_left = list( np.array(polygons_of_marginals)[cx_marg < mid_point_of_page_width] ) + poly_marg_right = list( np.array(polygons_of_marginals)[cx_marg >= mid_point_of_page_width] ) + + all_found_textline_polygons_marginals_left = list( np.array(all_found_textline_polygons_marginals)[cx_marg < mid_point_of_page_width] ) + all_found_textline_polygons_marginals_right = list( np.array(all_found_textline_polygons_marginals)[cx_marg >= mid_point_of_page_width] ) + + all_box_coord_marginals_left = list( np.array(all_box_coord_marginals)[cx_marg < mid_point_of_page_width] ) + all_box_coord_marginals_right = list( np.array(all_box_coord_marginals)[cx_marg >= mid_point_of_page_width] ) + + slopes_marg_left = list( np.array(slopes_marginals)[cx_marg < mid_point_of_page_width] ) + slopes_marg_right = list( np.array(slopes_marginals)[cx_marg >= mid_point_of_page_width] ) + + cy_marg_left = cy_marg[cx_marg < mid_point_of_page_width] + cy_marg_right = cy_marg[cx_marg >= mid_point_of_page_width] + + ordered_left_marginals = [poly for _, poly in sorted(zip(cy_marg_left, poly_marg_left), key=lambda x: x[0])] + ordered_right_marginals = [poly for _, poly in sorted(zip(cy_marg_right, poly_marg_right), key=lambda x: x[0])] + + ordered_left_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_left, all_found_textline_polygons_marginals_left), key=lambda x: x[0])] + ordered_right_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_right, all_found_textline_polygons_marginals_right), key=lambda x: x[0])] + + ordered_left_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_left, all_box_coord_marginals_left), key=lambda x: x[0])] + ordered_right_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_right, all_box_coord_marginals_right), key=lambda x: x[0])] + + ordered_left_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_left, slopes_marg_left), key=lambda x: x[0])] + ordered_right_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_right, slopes_marg_right), key=lambda x: x[0])] + + return ordered_left_marginals, ordered_right_marginals, ordered_left_marginals_textline, ordered_right_marginals_textline, ordered_left_marginals_bbox, ordered_right_marginals_bbox, ordered_left_slopes_marginals, ordered_right_slopes_marginals + def run(self, image_filename : Optional[str] = None, dir_in : Optional[str] = None, overwrite : bool = False): """ @@ -4489,12 +4546,13 @@ class Eynollah: t0 = time.time() img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) + if self.extract_only_images: text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \ self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], - polygons_of_images, [], [], [], [], [], + polygons_of_images, [], [], [], [], [], [], [], [], [], cont_page, [], []) if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) @@ -4508,7 +4566,6 @@ class Eynollah: page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = \ self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) - ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) @@ -4530,10 +4587,14 @@ class Eynollah: id_of_texts_tot =['region_0001'] polygons_of_images = [] - slopes_marginals = [] - polygons_of_marginals = [] - all_found_textline_polygons_marginals = [] - all_box_coord_marginals = [] + slopes_marginals_left = [] + slopes_marginals_right = [] + polygons_of_marginals_left = [] + polygons_of_marginals_right = [] + all_found_textline_polygons_marginals_left = [] + all_found_textline_polygons_marginals_right = [] + all_box_coord_marginals_left = [] + all_box_coord_marginals_right = [] polygons_lines_xml = [] contours_tables = [] conf_contours_textregions =[0] @@ -4546,8 +4607,8 @@ class Eynollah: pcgts = self.writer.build_pagexml_no_full_layout( cont_page, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, + all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order) return pcgts @@ -4595,11 +4656,10 @@ class Eynollah: #self.logger.info('cont_page %s', cont_page) #plt.imshow(table_prediction) #plt.show() - if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") pcgts = self.writer.build_pagexml_no_full_layout( - [], page_coord, [], [], [], [], [], [], [], [], [], [], + [], page_coord, [], [], [], [], [], [], [], [], [], [], [], [], [], [], cont_page, [], []) return pcgts @@ -4771,6 +4831,7 @@ class Eynollah: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] #contours_only_text_parent = [] + if not len(contours_only_text_parent): # stop early empty_marginals = [[]] * len(polygons_of_marginals) @@ -4778,13 +4839,13 @@ class Eynollah: pcgts = self.writer.build_pagexml_full_layout( [], [], page_coord, [], [], [], [], [], [], polygons_of_images, contours_tables, [], - polygons_of_marginals, empty_marginals, empty_marginals, [], [], [], + polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], [], cont_page, polygons_lines_xml) else: pcgts = self.writer.build_pagexml_no_full_layout( [], page_coord, [], [], [], [], polygons_of_images, - polygons_of_marginals, empty_marginals, empty_marginals, [], [], + polygons_of_marginals, polygons_of_marginals, empty_marginals, empty_marginals, empty_marginals, empty_marginals, [], [], [], cont_page, polygons_lines_xml, contours_tables) return pcgts @@ -4877,8 +4938,11 @@ class Eynollah: num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - - #print("text region early 6 in %.1fs", time.time() - t0) + + mid_point_of_page_width = text_regions_p.shape[1] / 2. + polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes_marginals_left, slopes_marginals_right = self.separate_marginals_to_left_and_right_and_order_from_top_to_down(polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width) + + #print(len(polygons_of_marginals), len(ordered_left_marginals), len(ordered_right_marginals), 'marginals ordred') if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( @@ -4961,7 +5025,6 @@ class Eynollah: tror = time.time() order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - print('time spend for mb ro', time.time()-tror) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions( @@ -4978,10 +5041,15 @@ class Eynollah: else: ocr_all_textlines = None - if all_found_textline_polygons_marginals and len(all_found_textline_polygons_marginals)>0: - ocr_all_textlines_marginals = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0: + ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_left, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: - ocr_all_textlines_marginals = None + ocr_all_textlines_marginals_left = None + + if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0: + ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_right, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + else: + ocr_all_textlines_marginals_right = None if all_found_textline_polygons_h and len(all_found_textline_polygons)>0: ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_h, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) @@ -4994,15 +5062,16 @@ class Eynollah: ocr_all_textlines_drop = None else: ocr_all_textlines = None - ocr_all_textlines_marginals = None + ocr_all_textlines_marginals_left = None + ocr_all_textlines_marginals_right = None ocr_all_textlines_h = None ocr_all_textlines_drop = None pcgts = self.writer.build_pagexml_full_layout( contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, - polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, - cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h) + polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h) return pcgts contours_only_text_parent_h = None @@ -5077,19 +5146,24 @@ class Eynollah: gc.collect() if len(all_found_textline_polygons)>0: ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - if all_found_textline_polygons_marginals and len(all_found_textline_polygons_marginals)>0: - ocr_all_textlines_marginals = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + + if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0: + ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_left, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + + if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0: + ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons_marginals_right, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) else: ocr_all_textlines = None - ocr_all_textlines_marginals = None + ocr_all_textlines_marginals_left = None + ocr_all_textlines_marginals_right = None self.logger.info("detection of reading order took %.1fs", time.time() - t_order) pcgts = self.writer.build_pagexml_no_full_layout( txt_con_org, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, - all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, - cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals, conf_contours_textregions) + all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions) return pcgts @@ -5145,7 +5219,7 @@ class Eynollah_ocr: self.b_s = int(batch_size) else: - self.model_ocr_dir = dir_models + "/model_eynollah_ocr_cnnrnn_20250805" + self.model_ocr_dir = dir_models + "/model_step_45000_ocr"#"/model_eynollah_ocr_cnnrnn_20250805"# model_ocr = load_model(self.model_ocr_dir , compile=False) self.prediction_model = tf.keras.models.Model( @@ -5156,7 +5230,7 @@ class Eynollah_ocr: else: self.b_s = int(batch_size) - with open(os.path.join(self.model_ocr_dir, "characters_org.txt"),"r") as config_file: + with open(os.path.join(self.model_ocr_dir, "characters_20250707_all_lang.txt"),"r") as config_file: characters = json.load(config_file) AUTOTUNE = tf.data.AUTOTUNE diff --git a/src/eynollah/mb_ro_on_layout.py b/src/eynollah/mb_ro_on_layout.py index 7625a90..c03d831 100644 --- a/src/eynollah/mb_ro_on_layout.py +++ b/src/eynollah/mb_ro_on_layout.py @@ -64,7 +64,7 @@ class machine_based_reading_order_on_layout: self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200) atexit.register(self.executor.shutdown) self.dir_models = dir_models - self.model_reading_order_dir = dir_models + "/model_step_5100000_mb_ro"#"/model_ens_reading_order_machine_based" + self.model_reading_order_dir = dir_models + "/model_eynollah_reading_order_20250824"#"/model_ens_reading_order_machine_based" try: for device in tf.config.list_physical_devices('GPU'): @@ -942,10 +942,18 @@ class machine_based_reading_order_on_layout: x_len = text_regions_p.shape[1] img_poly = np.zeros((y_len,x_len), dtype='uint8') - img_poly[text_regions_p[:,:]==1] = 1 - img_poly[text_regions_p[:,:]==2] = 2 - img_poly[text_regions_p[:,:]==3] = 4 - img_poly[text_regions_p[:,:]==6] = 5 + ###img_poly[text_regions_p[:,:]==1] = 1 + ###img_poly[text_regions_p[:,:]==2] = 2 + ###img_poly[text_regions_p[:,:]==3] = 4 + ###img_poly[text_regions_p[:,:]==6] = 5 + + ##img_poly[text_regions_p[:,:]==1] = 1 + ##img_poly[text_regions_p[:,:]==2] = 2 + ##img_poly[text_regions_p[:,:]==3] = 3 + ##img_poly[text_regions_p[:,:]==4] = 4 + ##img_poly[text_regions_p[:,:]==5] = 5 + + img_poly = np.copy(text_regions_p) img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') if contours_only_text_parent_h: diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py index 1e9162a..d974650 100644 --- a/src/eynollah/utils/utils_ocr.py +++ b/src/eynollah/utils/utils_ocr.py @@ -384,57 +384,63 @@ def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, pr for indexing, ind_poly_first in enumerate(all_found_textline_polygons): #ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): + if len(ind_poly_first)==0: cropped_lines_region_indexer.append(indexer_text_region) - if not (textline_light or curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] + cropped_lines_meging_indexing.append(0) + img_fin = np.ones((image_height, image_width, 3))*1 + cropped_lines.append(img_fin) - ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - - w_scaled = w * image_height/float(h) + else: + for indexing2, ind_poly in enumerate(ind_poly_first): + cropped_lines_region_indexer.append(indexer_text_region) + if not (textline_light or curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] - mask_poly = np.zeros(image.shape) - - img_poly_on_img = np.copy(image) - - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - - - - mask_poly = mask_poly[y:y+h, x:x+w, :] - img_crop = img_poly_on_img[y:y+h, x:x+w, :] - - img_crop[mask_poly==0] = 255 - - if w_scaled < 640:#1.5*image_width: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) - cropped_lines.append(img_fin) - cropped_lines_meging_indexing.append(0) - else: - splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None) + ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) - if splited_images: - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) - cropped_lines.append(img_fin) - cropped_lines_meging_indexing.append(1) - - img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) - - cropped_lines.append(img_fin) - cropped_lines_meging_indexing.append(-1) - - else: + w_scaled = w * image_height/float(h) + + mask_poly = np.zeros(image.shape) + + img_poly_on_img = np.copy(image) + + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + + + + mask_poly = mask_poly[y:y+h, x:x+w, :] + img_crop = img_poly_on_img[y:y+h, x:x+w, :] + + img_crop[mask_poly==0] = 255 + + if w_scaled < 640:#1.5*image_width: img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) cropped_lines.append(img_fin) cropped_lines_meging_indexing.append(0) + else: + splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None) + + if splited_images: + img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[0], image_height, image_width) + cropped_lines.append(img_fin) + cropped_lines_meging_indexing.append(1) + + img_fin = preprocess_and_resize_image_for_ocrcnn_model(splited_images[1], image_height, image_width) + + cropped_lines.append(img_fin) + cropped_lines_meging_indexing.append(-1) + + else: + img_fin = preprocess_and_resize_image_for_ocrcnn_model(img_crop, image_height, image_width) + cropped_lines.append(img_fin) + cropped_lines_meging_indexing.append(0) indexer_text_region+=1 - extracted_texts = [] n_iterations = math.ceil(len(cropped_lines) / b_s_ocr) diff --git a/src/eynollah/utils/xml.py b/src/eynollah/utils/xml.py index bd95702..13420df 100644 --- a/src/eynollah/utils/xml.py +++ b/src/eynollah/utils/xml.py @@ -46,16 +46,22 @@ def create_page_xml(imageFilename, height, width): )) return pcgts -def xml_reading_order(page, order_of_texts, id_of_marginalia): +def xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right): region_order = ReadingOrderType() og = OrderedGroupType(id="ro357564684568544579089") page.set_ReadingOrder(region_order) region_order.set_OrderedGroup(og) region_counter = EynollahIdCounter() + + for id_marginal in id_of_marginalia_left: + og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal)) + region_counter.inc('region') + for idx_textregion, _ in enumerate(order_of_texts): og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=region_counter.region_id(order_of_texts[idx_textregion] + 1))) region_counter.inc('region') - for id_marginal in id_of_marginalia: + + for id_marginal in id_of_marginalia_right: og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal)) region_counter.inc('region') diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 085ee6f..2f9caf3 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -170,7 +170,7 @@ class EynollahXmlWriter(): with open(self.output_filename, 'w') as f: f.write(to_xml(pcgts)) - def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines=None, ocr_all_textlines_marginals=None, conf_contours_textregion=None, skip_layout_reading_order=False): + def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals_left, found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, conf_contours_textregion=None, skip_layout_reading_order=False): self.logger.debug('enter build_pagexml_no_full_layout') # create the file structure @@ -181,8 +181,9 @@ class EynollahXmlWriter(): counter = EynollahIdCounter() if len(found_polygons_text_region) > 0: _counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) - id_of_marginalia = [_counter_marginals.next_region_id for _ in found_polygons_marginals] - xml_reading_order(page, order_of_texts, id_of_marginalia) + id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left] + id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right] + xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right) for mm in range(len(found_polygons_text_region)): textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', @@ -195,17 +196,29 @@ class EynollahXmlWriter(): else: ocr_textlines = None self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) - - for mm in range(len(found_polygons_marginals)): + + for mm in range(len(found_polygons_marginals_left)): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))) + Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord))) page.add_TextRegion(marginal) - if ocr_all_textlines_marginals: - ocr_textlines = ocr_all_textlines_marginals[mm] + if ocr_all_textlines_marginals_left: + ocr_textlines = ocr_all_textlines_marginals_left[mm] else: ocr_textlines = None - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_textlines) + #print(ocr_textlines, mm, len(all_found_textline_polygons_marginals_left[mm]) ) + self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines) + + for mm in range(len(found_polygons_marginals_right)): + marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', + Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord))) + page.add_TextRegion(marginal) + if ocr_all_textlines_marginals_right: + ocr_textlines = ocr_all_textlines_marginals_right[mm] + else: + ocr_textlines = None + + self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines) for mm in range(len(found_polygons_text_region_img)): img_region = ImageRegionType(id=counter.next_region_id, Coords=CoordsType()) @@ -249,7 +262,7 @@ class EynollahXmlWriter(): return pcgts - def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines=None, ocr_all_textlines_h=None, ocr_all_textlines_marginals=None, ocr_all_textlines_drop=None, conf_contours_textregion=None, conf_contours_textregion_h=None): + def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals_left,found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines=None, ocr_all_textlines_h=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, ocr_all_textlines_drop=None, conf_contours_textregion=None, conf_contours_textregion_h=None): self.logger.debug('enter build_pagexml_full_layout') # create the file structure @@ -259,8 +272,9 @@ class EynollahXmlWriter(): counter = EynollahIdCounter() _counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) - id_of_marginalia = [_counter_marginals.next_region_id for _ in found_polygons_marginals] - xml_reading_order(page, order_of_texts, id_of_marginalia) + id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left] + id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right] + xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right) for mm in range(len(found_polygons_text_region)): textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', @@ -285,15 +299,25 @@ class EynollahXmlWriter(): ocr_textlines = None self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines) - for mm in range(len(found_polygons_marginals)): + for mm in range(len(found_polygons_marginals_left)): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))) + Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord))) page.add_TextRegion(marginal) - if ocr_all_textlines_marginals: - ocr_textlines = ocr_all_textlines_marginals[mm] + if ocr_all_textlines_marginals_left: + ocr_textlines = ocr_all_textlines_marginals_left[mm] else: ocr_textlines = None - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_textlines) + self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines) + + for mm in range(len(found_polygons_marginals_right)): + marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', + Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord))) + page.add_TextRegion(marginal) + if ocr_all_textlines_marginals_right: + ocr_textlines = ocr_all_textlines_marginals_right[mm] + else: + ocr_textlines = None + self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines) for mm in range(len(found_polygons_drop_capitals)): dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital',