diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index 9d5b2c8..466e690 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -65,7 +65,12 @@ jobs: run: | python -m pip install --upgrade pip make install-dev EXTRAS=OCR,plotting - make deps-test + make deps-test EXTRAS=OCR,plotting + ls -l models_* + - name: Lint with ruff + uses: astral-sh/ruff-action@v3 + with: + src: "./src" - name: Test with pytest run: make coverage PYTEST_ARGS="-vv --junitxml=pytest.xml" - name: Get coverage results diff --git a/CHANGELOG.md b/CHANGELOG.md index f6776d6..ab3dd83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,11 +15,17 @@ Fixed: * `get_smallest_skew`: after shifting search range of rotation angle, use overall best result * Dockerfile: fix CUDA installation (cuDNN contested between Torch and TF due to extra OCR) * OCR: re-instate missing methods and fix `utils_ocr` function calls + * mbreorder/enhancement CLIs: missing imports * :fire: writer: `SeparatorRegion` needs `SeparatorRegionType` (not `ImageRegionType`) f458e3e * tests: switch from `pytest-subtests` to `parametrize` so we can use `pytest-isolate` (so CUDA memory gets freed between tests if running on GPU) +Added: + * test coverage for OCR options in `layout` + * test coverage for table detection in `layout` + * CI linting with ruff + Changed: * polygons: slightly widen for regions and lines, increase for separators @@ -28,7 +34,19 @@ Changed: but use shared memory if necessary, and switch back from `loky` to stdlib, and shutdown in `del()` instead of `atexit` * :fire: OCR: switch CNN-RNN model to `20250930` version compatible with TF 2.12 on CPU, too + * OCR: allow running `-tr` without `-fl`, too * :fire: writer: use `@type='heading'` instead of `'header'` for headings + * :fire: performance gains via refactoring (simplification, less copy-code, vectorization, + avoiding unused calculations, avoiding unnecessary 3-channel image operations) + * :fire: heuristic reading order detection: many improvements + - contour vs splitter box matching: + * contour must be contained in box exactly instead of heuristics + * make fallback center matching, center must be contained in box + - original vs deskewed contour matching: + * same min-area filter on both sides + * similar area score in addition to center proximity + * avoid duplicate and missing mappings by allowing N:M + matches and splitting+joining where necessary * CI: update+improve model caching diff --git a/Makefile b/Makefile index 357aa47..29dd877 100644 --- a/Makefile +++ b/Makefile @@ -58,6 +58,9 @@ help: # Download and extract models to $(PWD)/models_layout_v0_5_0 models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME) +# do not download these files if we already have the directories +.INTERMEDIATE: $(BIN_MODELFILE) $(SEG_MODELFILE) $(OCR_MODELFILE) + $(BIN_MODELFILE): wget -O $@ $(BIN_MODEL) $(SEG_MODELFILE): @@ -90,26 +93,29 @@ deps-test: $(OCR_MODELNAME) endif deps-test: $(BIN_MODELNAME) $(SEG_MODELNAME) $(PIP) install -r requirements-test.txt +ifeq (OCR,$(findstring OCR, $(EXTRAS))) + ln -rs $(OCR_MODELNAME)/* $(SEG_MODELNAME)/ +endif smoke-test: TMPDIR != mktemp -d smoke-test: tests/resources/kant_aufklaerung_1784_0020.tif # layout analysis: - eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/models_layout_v0_5_0 + eynollah layout -i $< -o $(TMPDIR) -m $(CURDIR)/$(SEG_MODELNAME) fgrep -q http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 $(TMPDIR)/$(basename $( int(nxf) else int(nxf) - nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) + nxf = math.ceil(img_w / float(width_mid)) + nyf = math.ceil(img_h / float(height_mid)) list_i_s = [] list_j_s = [] @@ -936,18 +944,10 @@ class Eynollah: img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) for i in range(nxf): for j in range(nyf): - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - else: - index_x_d = i * width_mid - index_x_u = index_x_d + img_width_model - if j == 0: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model - else: - index_y_d = j * height_mid - index_y_u = index_y_d + img_height_model + index_x_d = i * width_mid + index_x_u = index_x_d + img_width_model + index_y_d = j * height_mid + index_y_u = index_y_d + img_height_model if index_x_u > img_w: index_x_u = img_w index_x_d = img_w - img_width_model @@ -1856,17 +1856,13 @@ class Eynollah: return sorted_textlines - def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): + def get_slopes_and_deskew_new_light2(self, contours_par, textline_mask_tot, boxes, slope_deskew): polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) - M_main_tot = [cv2.moments(polygons_of_textlines[j]) - for j in range(len(polygons_of_textlines))] + cx_main_tot, cy_main_tot = find_center_of_contours(polygons_of_textlines) + w_h_textlines = [cv2.boundingRect(polygon)[2:] for polygon in polygons_of_textlines] - w_h_textlines = [cv2.boundingRect(polygons_of_textlines[i])[2:] for i in range(len(polygons_of_textlines))] - cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - - args_textlines = np.array(range(len(polygons_of_textlines))) + args_textlines = np.arange(len(polygons_of_textlines)) all_found_textline_polygons = [] slopes = [] all_box_coord =[] @@ -1893,16 +1889,12 @@ class Eynollah: all_box_coord.append(crop_coor) return (all_found_textline_polygons, - boxes, - contours, - contours_par, all_box_coord, - np.array(range(len(contours_par))), slopes) def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): if not len(contours): - return [], [], [], [], [], [], [] + return [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_light") with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: results = self.executor.map(partial(do_work_of_slopes_new_light, @@ -1910,15 +1902,15 @@ class Eynollah: slope_deskew=slope_deskew, textline_light=self.textline_light, logger=self.logger,), - boxes, contours, contours_par, range(len(contours_par))) + boxes, contours, contours_par) results = list(results) # exhaust prior to release - #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) + #textline_polygons, box_coord, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_light") return tuple(zip(*results)) def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, boxes, slope_deskew): if not len(contours): - return [], [], [], [], [], [], [] + return [], [], [] self.logger.debug("enter get_slopes_and_deskew_new") with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: results = self.executor.map(partial(do_work_of_slopes_new, @@ -1928,16 +1920,16 @@ class Eynollah: KERNEL=KERNEL, logger=self.logger, plotter=self.plotter,), - boxes, contours, contours_par, range(len(contours_par))) + boxes, contours, contours_par) results = list(results) # exhaust prior to release - #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) + #textline_polygons, box_coord, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new") return tuple(zip(*results)) - def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, boxes, + def get_slopes_and_deskew_new_curved(self, contours_par, textline_mask_tot, boxes, mask_texts_only, num_col, scale_par, slope_deskew): - if not len(contours): - return [], [], [], [], [], [], [] + if not len(contours_par): + return [], [], [] self.logger.debug("enter get_slopes_and_deskew_new_curved") with share_ndarray(textline_mask_tot) as textline_mask_tot_shared: with share_ndarray(mask_texts_only) as mask_texts_only_shared: @@ -1951,9 +1943,9 @@ class Eynollah: KERNEL=KERNEL, logger=self.logger, plotter=self.plotter,), - boxes, contours, contours_par, range(len(contours_par))) + boxes, contours_par) results = list(results) # exhaust prior to release - #textline_polygons, boxes, text_regions, text_regions_par, box_coord, index_text_con, slopes = zip(*results) + #textline_polygons, box_coord, slopes = zip(*results) self.logger.debug("exit get_slopes_and_deskew_new_curved") return tuple(zip(*results)) @@ -2055,45 +2047,6 @@ class Eynollah: (prediction_textline_longshot_true_size[:, :, 0]==1).astype(np.uint8)) - def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): - self.logger.debug('enter do_work_of_slopes') - slope_biggest = 0 - slopes_sub = [] - boxes_sub_new = [] - poly_sub = [] - for mv in range(len(boxes_per_process)): - crop_img, _ = crop_image_inside_box(boxes_per_process[mv], - np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) - crop_img = crop_img[:, :, 0] - crop_img = cv2.erode(crop_img, KERNEL, iterations=2) - try: - textline_con, hierarchy = return_contours_of_image(crop_img) - textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, - max_area=1, min_area=0.0008) - y_diff_mean = find_contours_mean_y_diff(textline_con_fil) - sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) - crop_img[crop_img > 0] = 1 - slope_corresponding_textregion = return_deskew_slop(crop_img, sigma_des, - logger=self.logger, plotter=self.plotter) - except Exception as why: - self.logger.error(why) - slope_corresponding_textregion = MAX_SLOPE - - if slope_corresponding_textregion == MAX_SLOPE: - slope_corresponding_textregion = slope_biggest - slopes_sub.append(slope_corresponding_textregion) - - cnt_clean_rot = textline_contours_postprocessing( - crop_img, slope_corresponding_textregion, contours_per_process[mv], boxes_per_process[mv]) - - poly_sub.append(cnt_clean_rot) - boxes_sub_new.append(boxes_per_process[mv]) - - q.put(slopes_sub) - poly.put(poly_sub) - box_sub.put(boxes_sub_new) - self.logger.debug('exit do_work_of_slopes') - def get_regions_light_v_extract_only_images(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_extract_images_only") erosion_hurts = False @@ -2514,376 +2467,139 @@ class Eynollah: self.logger.debug("exit get_regions_from_xy_2models") return text_regions_p_true, erosion_hurts, polygons_seplines - def do_order_of_regions_full_layout( + def do_order_of_regions( self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): - self.logger.debug("enter do_order_of_regions_full_layout") + self.logger.debug("enter do_order_of_regions") + contours_only_text_parent = np.array(contours_only_text_parent) + contours_only_text_parent_h = np.array(contours_only_text_parent_h) boxes = np.array(boxes, dtype=int) # to be on the safe side - cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( + c_boxes = np.stack((0.5 * boxes[:, 2:4].sum(axis=1), + 0.5 * boxes[:, 0:2].sum(axis=1))) + cx_main, cy_main, mx_main, Mx_main, my_main, My_main, mxy_main = find_new_features_of_contours( contours_only_text_parent) - cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = find_new_features_of_contours( + cx_head, cy_head, mx_head, Mx_head, my_head, My_head, mxy_head = find_new_features_of_contours( contours_only_text_parent_h) - try: - arg_text_con = [] - for ii in range(len(cx_text_only)): + def match_boxes(only_centers: bool): + arg_text_con_main = np.zeros(len(contours_only_text_parent), dtype=int) + for ii in range(len(contours_only_text_parent)): check_if_textregion_located_in_a_box = False - for jj in range(len(boxes)): - if (x_min_text_only[ii] + 80 >= boxes[jj][0] and - x_min_text_only[ii] + 80 < boxes[jj][1] and - y_cor_x_min_main[ii] >= boxes[jj][2] and - y_cor_x_min_main[ii] < boxes[jj][3]): - arg_text_con.append(jj) + for jj, box in enumerate(boxes): + if ((cx_main[ii] >= box[0] and + cx_main[ii] < box[1] and + cy_main[ii] >= box[2] and + cy_main[ii] < box[3]) if only_centers else + (mx_main[ii] >= box[0] and + Mx_main[ii] < box[1] and + my_main[ii] >= box[2] and + My_main[ii] < box[3])): + arg_text_con_main[ii] = jj check_if_textregion_located_in_a_box = True break if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + - (cy_text_only[ii] - boxes[jj][2]) ** 2) - for jj in range(len(boxes))] - ind_min = np.argmin(dists_tr_from_box) - arg_text_con.append(ind_min) - args_contours = np.array(range(len(arg_text_con))) - arg_text_con_h = [] - for ii in range(len(cx_text_only_h)): - check_if_textregion_located_in_a_box = False - for jj in range(len(boxes)): - if (x_min_text_only_h[ii] + 80 >= boxes[jj][0] and - x_min_text_only_h[ii] + 80 < boxes[jj][1] and - y_cor_x_min_main_h[ii] >= boxes[jj][2] and - y_cor_x_min_main_h[ii] < boxes[jj][3]): - arg_text_con_h.append(jj) - check_if_textregion_located_in_a_box = True - break - if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + - (cy_text_only_h[ii] - boxes[jj][2]) ** 2) - for jj in range(len(boxes))] - ind_min = np.argmin(dists_tr_from_box) - arg_text_con_h.append(ind_min) - args_contours_h = np.array(range(len(arg_text_con_h))) + dists_tr_from_box = np.linalg.norm(c_boxes - np.array([[cy_main[ii]], [cx_main[ii]]]), axis=0) + pcontained_in_box = ((boxes[:, 2] <= cy_main[ii]) & (cy_main[ii] < boxes[:, 3]) & + (boxes[:, 0] <= cx_main[ii]) & (cx_main[ii] < boxes[:, 1])) + ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box)) + arg_text_con_main[ii] = ind_min + args_contours_main = np.arange(len(contours_only_text_parent)) + order_by_con_main = np.zeros_like(arg_text_con_main) - order_by_con_head = np.zeros(len(arg_text_con_h)) - order_by_con_main = np.zeros(len(arg_text_con)) + arg_text_con_head = np.zeros(len(contours_only_text_parent_h), dtype=int) + for ii in range(len(contours_only_text_parent_h)): + check_if_textregion_located_in_a_box = False + for jj, box in enumerate(boxes): + if ((cx_head[ii] >= box[0] and + cx_head[ii] < box[1] and + cy_head[ii] >= box[2] and + cy_head[ii] < box[3]) if only_centers else + (mx_head[ii] >= box[0] and + Mx_head[ii] < box[1] and + my_head[ii] >= box[2] and + My_head[ii] < box[3])): + arg_text_con_head[ii] = jj + check_if_textregion_located_in_a_box = True + break + if not check_if_textregion_located_in_a_box: + dists_tr_from_box = np.linalg.norm(c_boxes - np.array([[cy_head[ii]], [cx_head[ii]]]), axis=0) + pcontained_in_box = ((boxes[:, 2] <= cy_head[ii]) & (cy_head[ii] < boxes[:, 3]) & + (boxes[:, 0] <= cx_head[ii]) & (cx_head[ii] < boxes[:, 1])) + ind_min = np.argmin(np.ma.masked_array(dists_tr_from_box, ~pcontained_in_box)) + arg_text_con_head[ii] = ind_min + args_contours_head = np.arange(len(contours_only_text_parent_h)) + order_by_con_head = np.zeros_like(arg_text_con_head) ref_point = 0 order_of_texts_tot = [] id_of_texts_tot = [] - for iij in range(len(boxes)): - ys = slice(*boxes[iij][2:4]) - xs = slice(*boxes[iij][0:2]) - args_contours_box = args_contours[np.array(arg_text_con) == iij] - args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] - con_inter_box = [] - con_inter_box_h = [] + for iij, box in enumerate(boxes): + ys = slice(*box[2:4]) + xs = slice(*box[0:2]) + args_contours_box_main = args_contours_main[arg_text_con_main == iij] + args_contours_box_head = args_contours_head[arg_text_con_head == iij] + con_inter_box = contours_only_text_parent[args_contours_box_main] + con_inter_box_h = contours_only_text_parent_h[args_contours_box_head] - for box in args_contours_box: - con_inter_box.append(contours_only_text_parent[box]) - - for box in args_contours_box_h: - con_inter_box_h.append(contours_only_text_parent_h[box]) - - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( - textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, box[2]) order_of_texts, id_of_texts = order_and_id_of_texts( con_inter_box, con_inter_box_h, - matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) - indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] - indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] - indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2] - indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2] + indexes_sorted_main = indexes_sorted[kind_of_texts_sorted == 1] + indexes_by_type_main = index_by_kind_sorted[kind_of_texts_sorted == 1] + indexes_sorted_head = indexes_sorted[kind_of_texts_sorted == 2] + indexes_by_type_head = index_by_kind_sorted[kind_of_texts_sorted == 2] - for zahler, _ in enumerate(args_contours_box): + for zahler, _ in enumerate(args_contours_box_main): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ - np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box_main[indexes_by_type_main[zahler]]] = \ + np.flatnonzero(indexes_sorted == arg_order_v) + ref_point - for zahler, _ in enumerate(args_contours_box_h): + for zahler, _ in enumerate(args_contours_box_head): arg_order_v = indexes_sorted_head[zahler] - order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \ - np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_head[args_contours_box_head[indexes_by_type_head[zahler]]] = \ + np.flatnonzero(indexes_sorted == arg_order_v) + ref_point for jji in range(len(id_of_texts)): order_of_texts_tot.append(order_of_texts[jji] + ref_point) id_of_texts_tot.append(id_of_texts[jji]) ref_point += len(id_of_texts) - order_of_texts_tot = [] - for tj1 in range(len(contours_only_text_parent)): - order_of_texts_tot.append(int(order_by_con_main[tj1])) - - for tj1 in range(len(contours_only_text_parent_h)): - order_of_texts_tot.append(int(order_by_con_head[tj1])) - - order_text_new = [] - for iii in range(len(order_of_texts_tot)): - order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - - except Exception as why: - self.logger.error(why) - arg_text_con = [] - for ii in range(len(cx_text_only)): - check_if_textregion_located_in_a_box = False - for jj in range(len(boxes)): - if (cx_text_only[ii] >= boxes[jj][0] and - cx_text_only[ii] < boxes[jj][1] and - cy_text_only[ii] >= boxes[jj][2] and - cy_text_only[ii] < boxes[jj][3]): - # this is valid if the center of region identify in which box it is located - arg_text_con.append(jj) - check_if_textregion_located_in_a_box = True - break - - if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + - (cy_text_only[ii] - boxes[jj][2]) ** 2) - for jj in range(len(boxes))] - ind_min = np.argmin(dists_tr_from_box) - arg_text_con.append(ind_min) - args_contours = np.array(range(len(arg_text_con))) - order_by_con_main = np.zeros(len(arg_text_con)) - - ############################# head - - arg_text_con_h = [] - for ii in range(len(cx_text_only_h)): - check_if_textregion_located_in_a_box = False - for jj in range(len(boxes)): - if (cx_text_only_h[ii] >= boxes[jj][0] and - cx_text_only_h[ii] < boxes[jj][1] and - cy_text_only_h[ii] >= boxes[jj][2] and - cy_text_only_h[ii] < boxes[jj][3]): - # this is valid if the center of region identify in which box it is located - arg_text_con_h.append(jj) - check_if_textregion_located_in_a_box = True - break - if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only_h[ii] - boxes[jj][1]) ** 2 + - (cy_text_only_h[ii] - boxes[jj][2]) ** 2) - for jj in range(len(boxes))] - ind_min = np.argmin(dists_tr_from_box) - arg_text_con_h.append(ind_min) - args_contours_h = np.array(range(len(arg_text_con_h))) - order_by_con_head = np.zeros(len(arg_text_con_h)) - - ref_point = 0 - order_of_texts_tot = [] - id_of_texts_tot = [] - for iij, _ in enumerate(boxes): - ys = slice(*boxes[iij][2:4]) - xs = slice(*boxes[iij][0:2]) - args_contours_box = args_contours[np.array(arg_text_con) == iij] - args_contours_box_h = args_contours_h[np.array(arg_text_con_h) == iij] - con_inter_box = [] - con_inter_box_h = [] - - for box in args_contours_box: - con_inter_box.append(contours_only_text_parent[box]) - - for box in args_contours_box_h: - con_inter_box_h.append(contours_only_text_parent_h[box]) - - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( - textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - - order_of_texts, id_of_texts = order_and_id_of_texts( - con_inter_box, con_inter_box_h, - matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) - - indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] - indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] - indexes_sorted_head = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 2] - indexes_by_type_head = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 2] - - for zahler, _ in enumerate(args_contours_box): - arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ - np.where(indexes_sorted == arg_order_v)[0][0] + ref_point - - for zahler, _ in enumerate(args_contours_box_h): - arg_order_v = indexes_sorted_head[zahler] - order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \ - np.where(indexes_sorted == arg_order_v)[0][0] + ref_point - - for jji, _ in enumerate(id_of_texts): - order_of_texts_tot.append(order_of_texts[jji] + ref_point) - id_of_texts_tot.append(id_of_texts[jji]) - ref_point += len(id_of_texts) - - order_of_texts_tot = [] - for tj1 in range(len(contours_only_text_parent)): - order_of_texts_tot.append(int(order_by_con_main[tj1])) - - for tj1 in range(len(contours_only_text_parent_h)): - order_of_texts_tot.append(int(order_by_con_head[tj1])) - - order_text_new = [] - for iii in range(len(order_of_texts_tot)): - order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - - self.logger.debug("exit do_order_of_regions_full_layout") - return order_text_new, id_of_texts_tot - - def do_order_of_regions_no_full_layout( - self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): - - self.logger.debug("enter do_order_of_regions_no_full_layout") - boxes = np.array(boxes, dtype=int) # to be on the safe side - cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( - contours_only_text_parent) + order_of_texts_tot = np.concatenate((order_by_con_main, + order_by_con_head)) + order_text_new = np.argsort(order_of_texts_tot) + return order_text_new, id_of_texts_tot try: - arg_text_con = [] - for ii in range(len(cx_text_only)): - check_if_textregion_located_in_a_box = False - for jj in range(len(boxes)): - if (x_min_text_only[ii] + 80 >= boxes[jj][0] and - x_min_text_only[ii] + 80 < boxes[jj][1] and - y_cor_x_min_main[ii] >= boxes[jj][2] and - y_cor_x_min_main[ii] < boxes[jj][3]): - arg_text_con.append(jj) - check_if_textregion_located_in_a_box = True - break - if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + - (cy_text_only[ii] - boxes[jj][2]) ** 2) - for jj in range(len(boxes))] - ind_min = np.argmin(dists_tr_from_box) - arg_text_con.append(ind_min) - args_contours = np.array(range(len(arg_text_con))) - order_by_con_main = np.zeros(len(arg_text_con)) - - ref_point = 0 - order_of_texts_tot = [] - id_of_texts_tot = [] - for iij in range(len(boxes)): - ys = slice(*boxes[iij][2:4]) - xs = slice(*boxes[iij][0:2]) - args_contours_box = args_contours[np.array(arg_text_con) == iij] - con_inter_box = [] - con_inter_box_h = [] - for i in range(len(args_contours_box)): - con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) - - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( - textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - - order_of_texts, id_of_texts = order_and_id_of_texts( - con_inter_box, con_inter_box_h, - matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) - - indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] - indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] - - for zahler, _ in enumerate(args_contours_box): - arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ - np.where(indexes_sorted == arg_order_v)[0][0] + ref_point - - for jji, _ in enumerate(id_of_texts): - order_of_texts_tot.append(order_of_texts[jji] + ref_point) - id_of_texts_tot.append(id_of_texts[jji]) - ref_point += len(id_of_texts) - - order_of_texts_tot = [] - for tj1 in range(len(contours_only_text_parent)): - order_of_texts_tot.append(int(order_by_con_main[tj1])) - - order_text_new = [] - for iii in range(len(order_of_texts_tot)): - order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - + results = match_boxes(False) except Exception as why: self.logger.error(why) - arg_text_con = [] - for ii in range(len(cx_text_only)): - check_if_textregion_located_in_a_box = False - for jj in range(len(boxes)): - if (cx_text_only[ii] >= boxes[jj][0] and - cx_text_only[ii] < boxes[jj][1] and - cy_text_only[ii] >= boxes[jj][2] and - cy_text_only[ii] < boxes[jj][3]): - # this is valid if the center of region identify in which box it is located - arg_text_con.append(jj) - check_if_textregion_located_in_a_box = True - break - if not check_if_textregion_located_in_a_box: - dists_tr_from_box = [math.sqrt((cx_text_only[ii] - boxes[jj][1]) ** 2 + - (cy_text_only[ii] - boxes[jj][2]) ** 2) - for jj in range(len(boxes))] - ind_min = np.argmin(dists_tr_from_box) - arg_text_con.append(ind_min) - args_contours = np.array(range(len(arg_text_con))) - order_by_con_main = np.zeros(len(arg_text_con)) + results = match_boxes(True) - ref_point = 0 - order_of_texts_tot = [] - id_of_texts_tot = [] - for iij in range(len(boxes)): - ys = slice(*boxes[iij][2:4]) - xs = slice(*boxes[iij][0:2]) - args_contours_box = args_contours[np.array(arg_text_con) == iij] - con_inter_box = [] - con_inter_box_h = [] - for i in range(len(args_contours_box)): - con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) - - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( - textline_mask_tot[ys, xs], con_inter_box, con_inter_box_h, boxes[iij][2]) - - order_of_texts, id_of_texts = order_and_id_of_texts( - con_inter_box, con_inter_box_h, - matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) - - indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] - indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] - - for zahler, _ in enumerate(args_contours_box): - arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ - np.where(indexes_sorted == arg_order_v)[0][0] + ref_point - - for jji, _ in enumerate(id_of_texts): - order_of_texts_tot.append(order_of_texts[jji] + ref_point) - id_of_texts_tot.append(id_of_texts[jji]) - ref_point += len(id_of_texts) - - order_of_texts_tot = [] - - for tj1 in range(len(contours_only_text_parent)): - order_of_texts_tot.append(int(order_by_con_main[tj1])) - - order_text_new = [] - for iii in range(len(order_of_texts_tot)): - order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - - self.logger.debug("exit do_order_of_regions_no_full_layout") - return order_text_new, id_of_texts_tot + self.logger.debug("exit do_order_of_regions") + return results def check_iou_of_bounding_box_and_contour_for_tables( self, layout, table_prediction_early, pixel_table, num_col_classifier): layout_org = np.copy(layout) - layout_org[:,:,0][layout_org[:,:,0]==pixel_table] = 0 - layout = (layout[:,:,0]==pixel_table)*1 - - layout =np.repeat(layout[:, :, np.newaxis], 3, axis=2) - layout = layout.astype(np.uint8) - imgray = cv2.cvtColor(layout, cv2.COLOR_BGR2GRAY ) - _, thresh = cv2.threshold(imgray, 0, 255, 0) + layout_org[layout_org == pixel_table] = 0 + layout = (layout == pixel_table).astype(np.uint8) * 1 + _, thresh = cv2.threshold(layout, 0, 255, 0) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - cnt_size = np.array([cv2.contourArea(contours[j]) - for j in range(len(contours))]) + cnt_size = np.array([cv2.contourArea(cnt) for cnt in contours]) contours_new = [] - for i in range(len(contours)): - x, y, w, h = cv2.boundingRect(contours[i]) + for i, contour in enumerate(contours): + x, y, w, h = cv2.boundingRect(contour) iou = cnt_size[i] /float(w*h) *100 if iou<80: - layout_contour = np.zeros((layout_org.shape[0], layout_org.shape[1])) - layout_contour= cv2.fillPoly(layout_contour,pts=[contours[i]] ,color=(1,1,1)) + layout_contour = np.zeros(layout_org.shape[:2]) + layout_contour = cv2.fillPoly(layout_contour, pts=[contour] ,color=1) layout_contour_sum = layout_contour.sum(axis=0) layout_contour_sum_diff = np.diff(layout_contour_sum) @@ -2899,45 +2615,42 @@ class Eynollah: layout_contour=cv2.erode(layout_contour[:,:], KERNEL, iterations=5) layout_contour=cv2.dilate(layout_contour[:,:], KERNEL, iterations=5) - layout_contour =np.repeat(layout_contour[:, :, np.newaxis], 3, axis=2) layout_contour = layout_contour.astype(np.uint8) - - imgray = cv2.cvtColor(layout_contour, cv2.COLOR_BGR2GRAY ) - _, thresh = cv2.threshold(imgray, 0, 255, 0) + _, thresh = cv2.threshold(layout_contour, 0, 255, 0) contours_sep, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) for ji in range(len(contours_sep) ): contours_new.append(contours_sep[ji]) if num_col_classifier>=2: - only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) - only_recent_contour_image= cv2.fillPoly(only_recent_contour_image, - pts=[contours_sep[ji]], color=(1,1,1)) + only_recent_contour_image = np.zeros(layout.shape[:2]) + only_recent_contour_image = cv2.fillPoly(only_recent_contour_image, + pts=[contours_sep[ji]], color=1) table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in_in1') if iou_in>30: - layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) + layout_org = cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=pixel_table) else: pass else: - layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=3 * (pixel_table,)) + layout_org= cv2.fillPoly(layout_org, pts=[contours_sep[ji]], color=pixel_table) else: - contours_new.append(contours[i]) + contours_new.append(contour) if num_col_classifier>=2: - only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) - only_recent_contour_image= cv2.fillPoly(only_recent_contour_image,pts=[contours[i]] ,color=(1,1,1)) + only_recent_contour_image = np.zeros(layout.shape[:2]) + only_recent_contour_image = cv2.fillPoly(only_recent_contour_image, pts=[contour],color=1) table_pixels_masked_from_early_pre = only_recent_contour_image * table_prediction_early iou_in = 100. * table_pixels_masked_from_early_pre.sum() / only_recent_contour_image.sum() #print(iou_in,'iou_in') if iou_in>30: - layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) + layout_org = cv2.fillPoly(layout_org, pts=[contour], color=pixel_table) else: pass else: - layout_org= cv2.fillPoly(layout_org, pts=[contours[i]], color=3 * (pixel_table,)) + layout_org = cv2.fillPoly(layout_org, pts=[contour], color=pixel_table) return layout_org, contours_new @@ -2984,16 +2697,10 @@ class Eynollah: pass boxes = np.array(boxes, dtype=int) # to be on the safe side - img_comm_e = np.zeros(image_revised_1.shape) - img_comm = np.repeat(img_comm_e[:, :, np.newaxis], 3, axis=2) - + img_comm = np.zeros(image_revised_1.shape, dtype=np.uint8) for indiv in np.unique(image_revised_1): - image_col=(image_revised_1==indiv)*255 - img_comm_in=np.repeat(image_col[:, :, np.newaxis], 3, axis=2) - img_comm_in=img_comm_in.astype(np.uint8) - - imgray = cv2.cvtColor(img_comm_in, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + image_col = (image_revised_1 == indiv).astype(np.uint8) * 255 + _, thresh = cv2.threshold(image_col, 0, 255, 0) contours,hirarchy=cv2.findContours(thresh.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) if indiv==pixel_table: @@ -3003,35 +2710,27 @@ class Eynollah: main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area=1, min_area=min_area) - img_comm = cv2.fillPoly(img_comm, pts = main_contours, color = (indiv, indiv, indiv)) - img_comm = img_comm.astype(np.uint8) + img_comm = cv2.fillPoly(img_comm, pts=main_contours, color=indiv) if not self.isNaN(slope_mean_hor): - image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1],3)) + image_revised_last = np.zeros(image_regions_eraly_p.shape[:2]) for i in range(len(boxes)): box_ys = slice(*boxes[i][2:4]) box_xs = slice(*boxes[i][0:2]) image_box = img_comm[box_ys, box_xs] try: - image_box_tabels_1=(image_box[:,:,0]==pixel_table)*1 + image_box_tabels_1 = (image_box == pixel_table) * 1 contours_tab,_=return_contours_of_image(image_box_tabels_1) contours_tab=filter_contours_area_of_image_tables(image_box_tabels_1,contours_tab,_,1,0.003) - image_box_tabels_1=(image_box[:,:,0]==pixel_line)*1 + image_box_tabels_1 = (image_box == pixel_line).astype(np.uint8) * 1 + image_box_tabels_and_m_text = ( (image_box == pixel_table) | + (image_box == 1) ).astype(np.uint8) * 1 - image_box_tabels_and_m_text=( (image_box[:,:,0]==pixel_table) | (image_box[:,:,0]==1) )*1 - image_box_tabels_and_m_text=image_box_tabels_and_m_text.astype(np.uint8) + image_box_tabels_1 = cv2.dilate(image_box_tabels_1, KERNEL, iterations=5) - image_box_tabels_1=image_box_tabels_1.astype(np.uint8) - image_box_tabels_1 = cv2.dilate(image_box_tabels_1,KERNEL,iterations = 5) - - contours_table_m_text,_=return_contours_of_image(image_box_tabels_and_m_text) - image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2) - - image_box_tabels=image_box_tabels.astype(np.uint8) - imgray = cv2.cvtColor(image_box_tabels, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - contours_line,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + contours_table_m_text, _ = return_contours_of_image(image_box_tabels_and_m_text) + _, thresh = cv2.threshold(image_box_tabels_1, 0, 255, 0) + contours_line, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) y_min_main_line ,y_max_main_line=find_features_of_contours(contours_line) y_min_main_tab ,y_max_main_tab=find_features_of_contours(contours_tab) @@ -3063,18 +2762,20 @@ class Eynollah: y_max_main_tab[i_t] < y_min_main_line[i_l] and y_min_main_tab[i_t] < y_min_main_line[i_l]): pass - elif np.abs(y_max_main_line[i_l]-y_min_main_line[i_l])<100: + elif abs(y_max_main_line[i_l] - y_min_main_line[i_l]) < 100: pass else: - y_up_tab.append(np.min([y_min_main_line[i_l], y_min_main_tab[i_t] ]) ) - y_down_tab.append( np.max([ y_max_main_line[i_l],y_max_main_tab[i_t] ]) ) + y_up_tab.append(min([y_min_main_line[i_l], + y_min_main_tab[i_t]])) + y_down_tab.append(max([y_max_main_line[i_l], + y_max_main_tab[i_t]])) if len(y_up_tab)==0: y_up_tabs.append(y_min_main_tab[i_t]) y_down_tabs.append(y_max_main_tab[i_t]) else: - y_up_tabs.append(np.min(y_up_tab)) - y_down_tabs.append(np.max(y_down_tab)) + y_up_tabs.append(min(y_up_tab)) + y_down_tabs.append(max(y_down_tab)) else: y_down_tabs=[] y_up_tabs=[] @@ -3084,7 +2785,7 @@ class Eynollah: y_up_tabs=[] for ii in range(len(y_up_tabs)): - image_box[y_up_tabs[ii]:y_down_tabs[ii],:,0]=pixel_table + image_box[y_up_tabs[ii]:y_down_tabs[ii]] = pixel_table image_revised_last[box_ys, box_xs] = image_box else: @@ -3095,21 +2796,16 @@ class Eynollah: image_revised_last[box_ys, box_xs] = image_box if num_col_classifier==1: - img_tables_col_1 = (image_revised_last[:,:,0] == pixel_table).astype(np.uint8) + img_tables_col_1 = (image_revised_last == pixel_table).astype(np.uint8) contours_table_col1, _ = return_contours_of_image(img_tables_col_1) _,_ ,_ , _, y_min_tab_col1 ,y_max_tab_col1, _= find_new_features_of_contours(contours_table_col1) if len(y_min_tab_col1)>0: for ijv in range(len(y_min_tab_col1)): - image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv]),:,:]=pixel_table + image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv])] = pixel_table return image_revised_last - def do_order_of_regions(self, *args, **kwargs): - if self.full_layout: - return self.do_order_of_regions_full_layout(*args, **kwargs) - return self.do_order_of_regions_no_full_layout(*args, **kwargs) - def get_tables_from_model(self, img, num_col_classifier): img_org = np.copy(img) img_height_h = img_org.shape[0] @@ -3445,13 +3141,11 @@ class Eynollah: pixel_lines = 3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: _, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_lines) + text_regions_p, num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_lines) + text_regions_p_1_n, num_col_classifier, self.tables, pixel_lines) #print(time.time()-t_0_box,'time box in 2') self.logger.info("num_col_classifier: %s", num_col_classifier) @@ -3477,7 +3171,7 @@ class Eynollah: pass else: text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 + text_regions_p_tables[(table_prediction == 1)] = 10 pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables, @@ -3498,8 +3192,8 @@ class Eynollah: pass else: text_regions_p_tables = np.copy(text_regions_p_1_n) - text_regions_p_tables =np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 + text_regions_p_tables = np.round(text_regions_p_tables) + text_regions_p_tables[(text_regions_p_tables != 3) & (table_prediction_n == 1)] = 10 pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( @@ -3519,21 +3213,21 @@ class Eynollah: if self.tables: if self.light_version: - text_regions_p[:,:][table_prediction[:,:]==1] = 10 - img_revised_tab=text_regions_p[:,:] + text_regions_p[table_prediction == 1] = 10 + img_revised_tab = text_regions_p[:,:] else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - img_revised_tab = np.copy(img_revised_tab2[:,:,0]) - img_revised_tab[:,:][(text_regions_p[:,:] == 1) & (img_revised_tab[:,:] != 10)] = 1 + img_revised_tab = np.copy(img_revised_tab2) + img_revised_tab[(text_regions_p == 1) & (img_revised_tab != 10)] = 1 else: - img_revised_tab = np.copy(text_regions_p[:,:]) - img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 - img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 + img_revised_tab = np.copy(text_regions_p) + img_revised_tab[img_revised_tab == 10] = 0 + img_revised_tab[img_revised_tab2_d_rotated == 10] = 10 - text_regions_p[:,:][text_regions_p[:,:]==10] = 0 - text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 + text_regions_p[text_regions_p == 10] = 0 + text_regions_p[img_revised_tab == 10] = 10 else: - img_revised_tab=text_regions_p[:,:] + img_revised_tab = text_regions_p[:,:] #img_revised_tab = text_regions_p[:, :] if self.light_version: polygons_of_images = return_contours_of_interested_region(text_regions_p, 2) @@ -3627,13 +3321,11 @@ class Eynollah: pixel_lines=3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_lines) + text_regions_p, num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: num_col_d, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, pixel_lines) + text_regions_p_1_n, num_col_classifier, self.tables, pixel_lines) if num_col_classifier>=3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3665,7 +3357,7 @@ class Eynollah: num_col_classifier, erosion_hurts, self.tables, self.right2left) text_regions_p_tables = np.copy(text_regions_p_1_n) text_regions_p_tables = np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10 + text_regions_p_tables[(text_regions_p_tables != 3) & (table_prediction_n == 1)] = 10 pixel_line = 3 img_revised_tab2 = self.add_tables_heuristic_to_layout( @@ -3684,17 +3376,17 @@ class Eynollah: text_regions_p.shape[1]) if np.abs(slope_deskew) < 0.13: - img_revised_tab = np.copy(img_revised_tab2[:,:,0]) + img_revised_tab = np.copy(img_revised_tab2) else: - img_revised_tab = np.copy(text_regions_p[:,:]) - img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 - img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 + img_revised_tab = np.copy(text_regions_p) + img_revised_tab[img_revised_tab == 10] = 0 + img_revised_tab[img_revised_tab2_d_rotated == 10] = 10 - ##img_revised_tab=img_revised_tab2[:,:,0] - #img_revised_tab=text_regions_p[:,:] - text_regions_p[:,:][text_regions_p[:,:]==10] = 0 - text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 - #img_revised_tab[img_revised_tab2[:,:,0]==10] =10 + ##img_revised_tab = img_revised_tab2[:,:] + #img_revised_tab = text_regions_p[:,:] + text_regions_p[text_regions_p == 10] = 0 + text_regions_p[img_revised_tab == 10] = 10 + #img_revised_tab[img_revised_tab2 == 10] = 10 pixel_img = 4 min_area_mar = 0.00001 @@ -3733,7 +3425,7 @@ class Eynollah: #text_regions_p[:,:][regions_fully[:,:,0]==6]=6 ##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) - ##regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 + ##regions_fully[:, :, 0][regions_fully_only_drop[:, :] == 4] = 4 drop_capital_label_in_full_layout_model = 3 drops = (regions_fully[:,:,0]==drop_capital_label_in_full_layout_model)*1 @@ -4208,7 +3900,7 @@ class Eynollah: return generated_text def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): - return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] + return list(np.array(ls_cons)[np.array(sorted_indexes)]) def return_it_in_two_groups(self, x_differential): split = [ind if x_differential[ind]!=x_differential[ind+1] else -1 @@ -4237,47 +3929,38 @@ class Eynollah: def filter_contours_inside_a_bigger_one(self, contours, contours_d_ordered, image, marginal_cnts=None, type_contour="textregion"): - if type_contour=="textregion": - areas = [cv2.contourArea(contours[j]) for j in range(len(contours))] + if type_contour == "textregion": + areas = np.array(list(map(cv2.contourArea, contours))) area_tot = image.shape[0]*image.shape[1] + areas_ratio = areas / area_tot + cx_main, cy_main = find_center_of_contours(contours) - M_main = [cv2.moments(contours[j]) - for j in range(len(contours))] - cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + contours_index_small = np.flatnonzero(areas_ratio < 1e-3) + contours_index_large = np.flatnonzero(areas_ratio >= 1e-3) - areas_ratio = np.array(areas)/ area_tot - contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3] - contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] - - #contours_> = [contours[ind] for ind in contours_index_big] + #contours_> = [contours[ind] for ind in contours_index_large] indexes_to_be_removed = [] for ind_small in contours_index_small: - results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) - for ind in contours_index_big] - if marginal_cnts: - results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], + results = [cv2.pointPolygonTest(contours[ind_large], (cx_main[ind_small], + cy_main[ind_small]), + False) + for ind_large in contours_index_large] + results = np.array(results) + if np.any(results==1): + indexes_to_be_removed.append(ind_small) + elif marginal_cnts: + results_marginal = [cv2.pointPolygonTest(marginal_cnt, (cx_main[ind_small], cy_main[ind_small]), False) - for ind in range(len(marginal_cnts))] + for marginal_cnt in marginal_cnts] results_marginal = np.array(results_marginal) - if np.any(results_marginal==1): indexes_to_be_removed.append(ind_small) - results = np.array(results) - - if np.any(results==1): - indexes_to_be_removed.append(ind_small) - - if len(indexes_to_be_removed)>0: - indexes_to_be_removed = np.unique(indexes_to_be_removed) - indexes_to_be_removed = np.sort(indexes_to_be_removed)[::-1] - for ind in indexes_to_be_removed: - contours.pop(ind) - if len(contours_d_ordered)>0: - contours_d_ordered.pop(ind) + contours = np.delete(contours, indexes_to_be_removed, axis=0) + if len(contours_d_ordered): + contours_d_ordered = np.delete(contours_d_ordered, indexes_to_be_removed, axis=0) return contours, contours_d_ordered @@ -4285,33 +3968,21 @@ class Eynollah: contours_txtline_of_all_textregions = [] indexes_of_textline_tot = [] index_textline_inside_textregion = [] + for ind_region, textlines in enumerate(contours): + contours_txtline_of_all_textregions.extend(textlines) + index_textline_inside_textregion.extend(list(range(len(textlines)))) + indexes_of_textline_tot.extend([ind_region] * len(textlines)) - for jj in range(len(contours)): - contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj] - - ind_textline_inside_tr = list(range(len(contours[jj]))) - index_textline_inside_textregion = index_textline_inside_textregion + ind_textline_inside_tr - ind_ins = [jj] * len(contours[jj]) - indexes_of_textline_tot = indexes_of_textline_tot + ind_ins - - M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) - for j in range(len(contours_txtline_of_all_textregions))] - cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - - areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions] + areas_tot = np.array(list(map(cv2.contourArea, contours_txtline_of_all_textregions))) area_tot_tot = image.shape[0]*image.shape[1] + cx_main_tot, cy_main_tot = find_center_of_contours(contours_txtline_of_all_textregions) - textregion_index_to_del = [] - textline_in_textregion_index_to_del = [] + textline_in_textregion_index_to_del = {} for ij in range(len(contours_txtline_of_all_textregions)): - args_all = list(np.array(range(len(contours_txtline_of_all_textregions)))) - args_all.pop(ij) - - areas_without = np.array(areas_tot)[args_all] area_of_con_interest = areas_tot[ij] - - args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest] + args_without = np.delete(np.arange(len(contours_txtline_of_all_textregions)), ij) + areas_without = areas_tot[args_without] + args_with_bigger_area = args_without[areas_without > 1.5*area_of_con_interest] if len(args_with_bigger_area)>0: results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], @@ -4322,18 +3993,17 @@ class Eynollah: results = np.array(results) if np.any(results==1): #print(indexes_of_textline_tot[ij], index_textline_inside_textregion[ij]) - textregion_index_to_del.append(int(indexes_of_textline_tot[ij])) - textline_in_textregion_index_to_del.append(int(index_textline_inside_textregion[ij])) - #contours[int(indexes_of_textline_tot[ij])].pop(int(index_textline_inside_textregion[ij])) + textline_in_textregion_index_to_del.setdefault( + indexes_of_textline_tot[ij], list()).append( + index_textline_inside_textregion[ij]) + #contours[indexes_of_textline_tot[ij]].pop(index_textline_inside_textregion[ij]) - textregion_index_to_del = np.array(textregion_index_to_del) - textline_in_textregion_index_to_del = np.array(textline_in_textregion_index_to_del) - for ind_u_a_trs in np.unique(textregion_index_to_del): - textline_in_textregion_index_to_del_ind = \ - textline_in_textregion_index_to_del[textregion_index_to_del==ind_u_a_trs] - textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1] - for ittrd in textline_in_textregion_index_to_del_ind: - contours[ind_u_a_trs].pop(ittrd) + for textregion_index_to_del in textline_in_textregion_index_to_del: + contours[textregion_index_to_del] = list(np.delete( + contours[textregion_index_to_del], + textline_in_textregion_index_to_del[textregion_index_to_del], + # needed so numpy does not flatten the entire result when 0 left + axis=0)) return contours @@ -4363,131 +4033,75 @@ class Eynollah: def filter_contours_without_textline_inside( - self, contours, text_con_org, contours_textline, + self, contours_par, contours_textline, contours_only_text_parent_d_ordered, conf_contours_textregions): - ###contours_txtline_of_all_textregions = [] - ###for jj in range(len(contours_textline)): - ###contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours_textline[jj] - ###M_main_textline = [cv2.moments(contours_txtline_of_all_textregions[j]) - ### for j in range(len(contours_txtline_of_all_textregions))] - ###cx_main_textline = [(M_main_textline[j]["m10"] / (M_main_textline[j]["m00"] + 1e-32)) - ### for j in range(len(M_main_textline))] - ###cy_main_textline = [(M_main_textline[j]["m01"] / (M_main_textline[j]["m00"] + 1e-32)) - ### for j in range(len(M_main_textline))] - - ###M_main = [cv2.moments(contours[j]) for j in range(len(contours))] - ###cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - ###cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - - ###contours_with_textline = [] - ###for ind_tr, con_tr in enumerate(contours): - ###results = [cv2.pointPolygonTest(con_tr, - ### (cx_main_textline[index_textline_con], - ### cy_main_textline[index_textline_con]), - ### False) - ### for index_textline_con in range(len(contours_txtline_of_all_textregions)) ] - ###results = np.array(results) - ###if np.any(results==1): - ###contours_with_textline.append(con_tr) - - textregion_index_to_del = set() - for index_textregion, textlines_textregion in enumerate(contours_textline): - if len(textlines_textregion) == 0: - textregion_index_to_del.add(index_textregion) + assert len(contours_par) == len(contours_textline) + indices = np.arange(len(contours_textline)) + indices = np.delete(indices, np.flatnonzero([len(lines) == 0 for lines in contours_textline])) def filterfun(lis): if len(lis) == 0: return [] - if len(textregion_index_to_del) == 0: - return lis - return list(np.delete(lis, list(textregion_index_to_del))) + return list(np.array(lis)[indices]) - return (filterfun(contours), - filterfun(text_con_org), - filterfun(conf_contours_textregions), + return (filterfun(contours_par), filterfun(contours_textline), filterfun(contours_only_text_parent_d_ordered), - np.arange(len(contours) - len(textregion_index_to_del))) + filterfun(conf_contours_textregions), + # indices + ) - def delete_regions_without_textlines( - self, slopes, all_found_textline_polygons, boxes_text, txt_con_org, - contours_only_text_parent, index_by_text_par_con): - - slopes_rem = [] - all_found_textline_polygons_rem = [] - boxes_text_rem = [] - txt_con_org_rem = [] - contours_only_text_parent_rem = [] - index_by_text_par_con_rem = [] - - for i, ind_con in enumerate(all_found_textline_polygons): - if len(ind_con): - all_found_textline_polygons_rem.append(ind_con) - slopes_rem.append(slopes[i]) - boxes_text_rem.append(boxes_text[i]) - txt_con_org_rem.append(txt_con_org[i]) - contours_only_text_parent_rem.append(contours_only_text_parent[i]) - index_by_text_par_con_rem.append(index_by_text_par_con[i]) - - index_sort = np.argsort(index_by_text_par_con_rem) - indexes_new = np.array(range(len(index_by_text_par_con_rem))) - - index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] - for j in range(len(index_by_text_par_con_rem))] - - return (slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, - contours_only_text_parent_rem, index_by_text_par_con_rem_sort) - def separate_marginals_to_left_and_right_and_order_from_top_to_down( self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes_marginals, mid_point_of_page_width): - cx_marg, cy_marg, _, _, _, _, _ = find_new_features_of_contours( - polygons_of_marginals) - + cx_marg, cy_marg = find_center_of_contours(polygons_of_marginals) cx_marg = np.array(cx_marg) cy_marg = np.array(cy_marg) + + def split(lis): + array = np.array(lis) + return (list(array[cx_marg < mid_point_of_page_width]), + list(array[cx_marg >= mid_point_of_page_width])) + + (poly_marg_left, + poly_marg_right) = \ + split(polygons_of_marginals) + + (all_found_textline_polygons_marginals_left, + all_found_textline_polygons_marginals_right) = \ + split(all_found_textline_polygons_marginals) - poly_marg_left = list( np.array(polygons_of_marginals)[cx_marg < mid_point_of_page_width] ) - poly_marg_right = list( np.array(polygons_of_marginals)[cx_marg >= mid_point_of_page_width] ) + (all_box_coord_marginals_left, + all_box_coord_marginals_right) = \ + split(all_box_coord_marginals) - all_found_textline_polygons_marginals_left = \ - list( np.array(all_found_textline_polygons_marginals)[cx_marg < mid_point_of_page_width] ) - all_found_textline_polygons_marginals_right = \ - list( np.array(all_found_textline_polygons_marginals)[cx_marg >= mid_point_of_page_width] ) + (slopes_marg_left, + slopes_marg_right) = \ + split(slopes_marginals) - all_box_coord_marginals_left = list( np.array(all_box_coord_marginals)[cx_marg < mid_point_of_page_width] ) - all_box_coord_marginals_right = list( np.array(all_box_coord_marginals)[cx_marg >= mid_point_of_page_width] ) + (cy_marg_left, + cy_marg_right) = \ + split(cy_marg) + + order_left = np.argsort(cy_marg_left) + order_right = np.argsort(cy_marg_right) + def sort_left(lis): + return list(np.array(lis)[order_left]) + def sort_right(lis): + return list(np.array(lis)[order_right]) - slopes_marg_left = list( np.array(slopes_marginals)[cx_marg < mid_point_of_page_width] ) - slopes_marg_right = list( np.array(slopes_marginals)[cx_marg >= mid_point_of_page_width] ) + ordered_left_marginals = sort_left(poly_marg_left) + ordered_right_marginals = sort_right(poly_marg_right) - cy_marg_left = cy_marg[cx_marg < mid_point_of_page_width] - cy_marg_right = cy_marg[cx_marg >= mid_point_of_page_width] + ordered_left_marginals_textline = sort_left(all_found_textline_polygons_marginals_left) + ordered_right_marginals_textline = sort_right(all_found_textline_polygons_marginals_right) - ordered_left_marginals = [poly for _, poly in sorted(zip(cy_marg_left, poly_marg_left), - key=lambda x: x[0])] - ordered_right_marginals = [poly for _, poly in sorted(zip(cy_marg_right, poly_marg_right), - key=lambda x: x[0])] + ordered_left_marginals_bbox = sort_left(all_box_coord_marginals_left) + ordered_right_marginals_bbox = sort_right(all_box_coord_marginals_right) - ordered_left_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_left, - all_found_textline_polygons_marginals_left), - key=lambda x: x[0])] - ordered_right_marginals_textline = [poly for _, poly in sorted(zip(cy_marg_right, - all_found_textline_polygons_marginals_right), - key=lambda x: x[0])] - - ordered_left_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_left, - all_box_coord_marginals_left), - key=lambda x: x[0])] - ordered_right_marginals_bbox = [poly for _, poly in sorted(zip(cy_marg_right, - all_box_coord_marginals_right), - key=lambda x: x[0])] - - ordered_left_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_left, slopes_marg_left), - key=lambda x: x[0])] - ordered_right_slopes_marginals = [poly for _, poly in sorted(zip(cy_marg_right, slopes_marg_right), - key=lambda x: x[0])] + ordered_left_slopes_marginals = sort_left(slopes_marg_left) + ordered_right_slopes_marginals = sort_right(slopes_marg_right) return (ordered_left_marginals, ordered_right_marginals, @@ -4627,14 +4241,11 @@ class Eynollah: all_found_textline_polygons = filter_contours_area_of_image( textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - M_main_tot = [cv2.moments(all_found_textline_polygons[j]) - for j in range(len(all_found_textline_polygons))] - w_h_textlines = [cv2.boundingRect(all_found_textline_polygons[j])[2:] - for j in range(len(all_found_textline_polygons))] - w_h_textlines = [w_h_textlines[j][0] / float(w_h_textlines[j][1]) for j in range(len(w_h_textlines))] - cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] - + cx_main_tot, cy_main_tot = find_center_of_contours(all_found_textline_polygons) + w_h_textlines = [cv2.boundingRect(polygon)[2:] + for polygon in all_found_textline_polygons] + w_h_textlines = [w / float(h) for w, h in w_h_textlines] + all_found_textline_polygons = self.get_textlines_of_a_textregion_sorted( #all_found_textline_polygons[::-1] all_found_textline_polygons, cx_main_tot, cy_main_tot, w_h_textlines) @@ -4646,39 +4257,25 @@ class Eynollah: order_text_new = [0] slopes =[0] id_of_texts_tot =['region_0001'] - - polygons_of_images = [] - slopes_marginals_left = [] - slopes_marginals_right = [] - polygons_of_marginals_left = [] - polygons_of_marginals_right = [] - all_found_textline_polygons_marginals_left = [] - all_found_textline_polygons_marginals_right = [] - all_box_coord_marginals_left = [] - all_box_coord_marginals_right = [] - polygons_seplines = [] - contours_tables = [] conf_contours_textregions =[0] if self.ocr and not self.tr: gc.collect() ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons, self.prediction_model, - self.b_s_ocr, self.num_to_char, textline_light=True) + image_page, all_found_textline_polygons, np.zeros((len(all_found_textline_polygons), 4)), + self.prediction_model, self.b_s_ocr, self.num_to_char, textline_light=True) else: ocr_all_textlines = None pcgts = self.writer.build_pagexml_no_full_layout( cont_page, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, page_coord, polygons_of_images, - polygons_of_marginals_left, polygons_of_marginals_right, - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, - all_box_coord_marginals_left, all_box_coord_marginals_right, - slopes, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_seplines, contours_tables, + all_found_textline_polygons, page_coord, [], + [], [], [], [], [], [], + slopes, [], [], + cont_page, [], [], ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, - skip_layout_reading_order=self.skip_layout_and_reading_order) + skip_layout_reading_order=True) self.logger.info("Basic processing complete") return pcgts @@ -4818,109 +4415,129 @@ class Eynollah: ###min_con_area = 0.000005 contours_only_text, hir_on_text = return_contours_of_image(text_only) contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] if len(contours_only_text_parent) > 0: + areas_tot_text = np.prod(text_only.shape) areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + areas_cnt_text = areas_cnt_text / float(areas_tot_text) #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) - if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + contours_only_text_parent = np.array(contours_only_text_parent)[areas_cnt_text > MIN_AREA_REGION] + areas_cnt_text_parent = areas_cnt_text[areas_cnt_text > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + contours_only_text_parent = contours_only_text_parent[index_con_parents] + areas_cnt_text_parent = areas_cnt_text_parent[index_con_parents] - contours_only_text_parent = self.return_list_of_contours_with_desired_order( - contours_only_text_parent, index_con_parents) + centers = np.stack(find_center_of_contours(contours_only_text_parent)) # [2, N] - ##try: - ##contours_only_text_parent = \ - ##list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = \ - ##list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order( - areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + center0 = centers[:, -1:] # [2, 1] if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + areas_tot_text_d = np.prod(text_only_d.shape) areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + areas_cnt_text_d = areas_cnt_text_d / float(areas_tot_text_d) - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + contours_only_text_parent_d = np.array(contours_only_text_parent_d)[areas_cnt_text_d > MIN_AREA_REGION] + areas_cnt_text_d = areas_cnt_text_d[areas_cnt_text_d > MIN_AREA_REGION] + + if len(contours_only_text_parent_d): index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order( - contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = \ - #list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = \ - #list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order( - areas_cnt_text_d, index_con_parents_d) + contours_only_text_parent_d = np.array(contours_only_text_parent_d)[index_con_parents_d] + areas_cnt_text_d = areas_cnt_text_d[index_con_parents_d] - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = \ - find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = \ - find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + - (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) - for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + - (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) - for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + centers_d = np.stack(find_center_of_contours(contours_only_text_parent_d)) # [2, N] - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(str(why)) + center0_d = centers_d[:, -1:].copy() # [2, 1] + # find the largest among the largest 5 deskewed contours + # that is also closest to the largest original contour + last5_centers_d = centers_d[:, -5:] + dists_d = np.linalg.norm(center0 - last5_centers_d, axis=0) + ind_largest = len(contours_only_text_parent_d) - last5_centers_d.shape[1] + np.argmin(dists_d) + center0_d[:, 0] = centers_d[:, ind_largest] + + # order new contours the same way as the undeskewed contours + # (by calculating the offset of the largest contours, respectively, + # of the new and undeskewed image; then for each contour, + # finding the closest new contour, with proximity calculated + # as distance of their centers modulo offset vector) (h, w) = text_only.shape[:2] center = (w // 2.0, h // 2.0) M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big + center0 = np.dot(M_22, center0) # [2, 1] + offset = center0 - center0_d # [2, 1] - contours_only_text_parent_d_ordered = [] + centers = np.dot(M_22, centers) - offset # [2,N] + # add dimension for area (so only contours of similar size will be considered close) + centers = np.append(centers, areas_cnt_text_parent[np.newaxis], axis=0) + centers_d = np.append(centers_d, areas_cnt_text_d[np.newaxis], axis=0) + + dists = np.zeros((len(contours_only_text_parent), len(contours_only_text_parent_d))) for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + - (p[1] - cy_biggest_d[j]) ** 2) - for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] - - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - #contours_only_text_parent = [] + dists[i] = np.linalg.norm(centers[:, i:i + 1] - centers_d, axis=0) + corresp = np.zeros(dists.shape, dtype=bool) + # keep searching next-closest until at least one correspondence on each side + while not np.all(corresp.sum(axis=1)) and not np.all(corresp.sum(axis=0)): + idx = np.nanargmin(dists) + i, j = np.unravel_index(idx, dists.shape) + dists[i, j] = np.nan + corresp[i, j] = True + #print("original/deskewed adjacency", corresp.nonzero()) + contours_only_text_parent_d_ordered = np.zeros_like(contours_only_text_parent) + contours_only_text_parent_d_ordered = contours_only_text_parent_d[np.argmax(corresp, axis=1)] + # img1 = np.zeros(text_only_d.shape[:2], dtype=np.uint8) + # for i in range(len(contours_only_text_parent)): + # cv2.fillPoly(img1, pts=[contours_only_text_parent_d_ordered[i]], color=i + 1) + # plt.subplot(2, 2, 1, title="direct corresp contours") + # plt.imshow(img1) + # img2 = np.zeros(text_only_d.shape[:2], dtype=np.uint8) + # join deskewed regions mapping to single original ones + for i in range(len(contours_only_text_parent)): + if np.count_nonzero(corresp[i]) > 1: + indices = np.flatnonzero(corresp[i]) + #print("joining", indices) + polygons_d = [contour2polygon(contour) + for contour in contours_only_text_parent_d[indices]] + contour_d = polygon2contour(join_polygons(polygons_d)) + contours_only_text_parent_d_ordered[i] = contour_d + # cv2.fillPoly(img2, pts=[contour_d], color=i + 1) + # plt.subplot(2, 2, 3, title="joined contours") + # plt.imshow(img2) + # img3 = np.zeros(text_only_d.shape[:2], dtype=np.uint8) + # split deskewed regions mapping to multiple original ones + def deskew(polygon): + polygon = shapely.affinity.rotate(polygon, -slope_deskew, origin=center) + polygon = shapely.affinity.translate(polygon, *offset.squeeze()) + return polygon + for j in range(len(contours_only_text_parent_d)): + if np.count_nonzero(corresp[:, j]) > 1: + indices = np.flatnonzero(corresp[:, j]) + #print("splitting along", indices) + polygons = [deskew(contour2polygon(contour)) + for contour in contours_only_text_parent[indices]] + polygon_d = contour2polygon(contours_only_text_parent_d[j]) + polygons_d = [make_intersection(polygon_d, polygon) + for polygon in polygons] + # ignore where there is no actual overlap + indices = indices[np.flatnonzero(polygons_d)] + contours_d = [polygon2contour(polygon_d) + for polygon_d in polygons_d + if polygon_d] + contours_only_text_parent_d_ordered[indices] = contours_d + # cv2.fillPoly(img3, pts=contours_d, color=j + 1) + # plt.subplot(2, 2, 4, title="split contours") + # plt.imshow(img3) + # img4 = np.zeros(text_only_d.shape[:2], dtype=np.uint8) + # for i in range(len(contours_only_text_parent)): + # cv2.fillPoly(img4, pts=[contours_only_text_parent_d_ordered[i]], color=i + 1) + # plt.subplot(2, 2, 2, title="result contours") + # plt.imshow(img4) + # plt.show() if not len(contours_only_text_parent): # stop early @@ -4946,7 +4563,6 @@ class Eynollah: return pcgts - #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) @@ -4954,85 +4570,75 @@ class Eynollah: contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, marginal_cnts=polygons_of_marginals) #print("text region early 3.5 in %.1fs", time.time() - t0) - txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( + conf_contours_textregions = get_textregion_contours_in_org_image_light( contours_only_text_parent, self.image, confidence_matrix) - #txt_con_org = dilate_textregion_contours(txt_con_org) #contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) else: - txt_con_org , conf_contours_textregions = get_textregion_contours_in_org_image_light( + conf_contours_textregions = get_textregion_contours_in_org_image_light( contours_only_text_parent, self.image, confidence_matrix) #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + boxes_text = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals = get_text_region_boxes_by_given_contours(polygons_of_marginals) #print("text region early 5 in %.1fs", time.time() - t0) ## birdan sora chock chakir if not self.curved_line: if self.light_version: if self.textline_light: - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ - all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light2( - txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, + all_found_textline_polygons, \ + all_box_coord, slopes = self.get_slopes_and_deskew_new_light2( + contours_only_text_parent, textline_mask_tot_ea_org, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ - all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light2( - polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, + all_found_textline_polygons_marginals, \ + all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_light2( + polygons_of_marginals, textline_mask_tot_ea_org, boxes_marginals, slope_deskew) - #slopes, all_found_textline_polygons, boxes_text, txt_con_org, \ - # contours_only_text_parent, index_by_text_par_con = \ - # self.delete_regions_without_textlines(slopes, all_found_textline_polygons, - # boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) - #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, \ - # polygons_of_marginals, polygons_of_marginals, _ = \ - # self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, - # boxes_marginals, polygons_of_marginals, polygons_of_marginals, - # np.array(range(len(polygons_of_marginals)))) all_found_textline_polygons = dilate_textline_contours( all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( all_found_textline_polygons, None, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons_marginals = dilate_textline_contours( all_found_textline_polygons_marginals) - contours_only_text_parent, txt_con_org, conf_contours_textregions, \ - all_found_textline_polygons, contours_only_text_parent_d_ordered, \ - index_by_text_par_con = self.filter_contours_without_textline_inside( - contours_only_text_parent, txt_con_org, all_found_textline_polygons, + contours_only_text_parent, all_found_textline_polygons, \ + contours_only_text_parent_d_ordered, conf_contours_textregions = \ + self.filter_contours_without_textline_inside( + contours_only_text_parent, all_found_textline_polygons, contours_only_text_parent_d_ordered, conf_contours_textregions) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, \ - index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_light( - txt_con_org, contours_only_text_parent, textline_mask_tot_ea, + all_found_textline_polygons, \ + all_box_coord, slopes = self.get_slopes_and_deskew_new_light( + contours_only_text_parent, contours_only_text_parent, textline_mask_tot_ea, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ - all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_light( + all_found_textline_polygons_marginals, \ + all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_light( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, boxes_marginals, slope_deskew) #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one( # all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ - all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new( - txt_con_org, contours_only_text_parent, textline_mask_tot_ea, + all_found_textline_polygons, \ + all_box_coord, slopes = self.get_slopes_and_deskew_new( + contours_only_text_parent, contours_only_text_parent, textline_mask_tot_ea, boxes_text, slope_deskew) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ - all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new( + all_found_textline_polygons_marginals, \ + all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new( polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, boxes_marginals, slope_deskew) else: scale_param = 1 textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2) - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, \ - all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( - txt_con_org, contours_only_text_parent, textline_mask_tot_ea_erode, + all_found_textline_polygons, \ + all_box_coord, slopes = self.get_slopes_and_deskew_new_curved( + contours_only_text_parent, textline_mask_tot_ea_erode, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2( all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, \ - all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( - polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_erode, + all_found_textline_polygons_marginals, \ + all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_curved( + polygons_of_marginals, textline_mask_tot_ea_erode, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( @@ -5048,19 +4654,8 @@ class Eynollah: slopes_marginals, mid_point_of_page_width) #print(len(polygons_of_marginals), len(ordered_left_marginals), len(ordered_right_marginals), 'marginals ordred') + if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( - contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = \ - #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = \ - #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - else: - #takes long timee - contours_only_text_parent_d_ordered = None if self.light_version: fun = check_any_text_region_in_model_one_is_main_or_header_light else: @@ -5078,7 +4673,8 @@ class Eynollah: self.plotter.save_plot_of_layout_all(text_regions_p, image_page) label_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, label_img) + polygons_of_drop_capitals = return_contours_of_interested_region(text_regions_p, label_img, + min_area=0.00003) ##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline( ##text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, ##all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, @@ -5089,21 +4685,17 @@ class Eynollah: if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, label_seps, contours_only_text_parent_h) + text_regions_p, num_col_classifier, self.tables, label_seps, contours_only_text_parent_h) else: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, label_seps, contours_only_text_parent_h_d_ordered) + text_regions_p_1_n, num_col_classifier, self.tables, label_seps, contours_only_text_parent_h_d_ordered) elif self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, label_seps) + text_regions_p, num_col_classifier, self.tables, label_seps) else: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( - np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), - num_col_classifier, self.tables, label_seps) + text_regions_p_1_n, num_col_classifier, self.tables, label_seps) if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -5123,87 +4715,127 @@ class Eynollah: splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left, logger=self.logger) + else: + contours_only_text_parent_h = [] + contours_only_text_parent_h_d_ordered = [] if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) t_order = time.time() - if self.full_layout: - self.logger.info("Step 4/5: Reading Order Detection") - - if self.reading_order_machine_based: - self.logger.info("Using machine-based detection") - if self.right2left: - self.logger.info("Right-to-left mode enabled") - if self.headers_off: - self.logger.info("Headers ignored in reading order") + self.logger.info("Step 4/5: Reading Order Detection") - if self.reading_order_machine_based: - tror = time.time() - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( - contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + if self.reading_order_machine_based: + self.logger.info("Using machine-based detection") + if self.right2left: + self.logger.info("Right-to-left mode enabled") + if self.headers_off: + self.logger.info("Headers ignored in reading order") + + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( + contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + else: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions( + contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions( - contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) - else: - order_text_new, id_of_texts_tot = self.do_order_of_regions( - contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, - boxes_d, textline_mask_tot_d) - self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s") + order_text_new, id_of_texts_tot = self.do_order_of_regions( + contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, + boxes_d, textline_mask_tot_d) + self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s") - if self.ocr and not self.tr: - self.logger.info("Step 4.5/5: OCR Processing") - - if torch.cuda.is_available(): - self.logger.info("Using GPU acceleration") - else: - self.logger.info("Using CPU processing") - + ocr_all_textlines = None + ocr_all_textlines_marginals_left = None + ocr_all_textlines_marginals_right = None + ocr_all_textlines_h = None + ocr_all_textlines_drop = None + if self.ocr: + self.logger.info("Step 4.5/5: OCR Processing") + + if not self.tr: gc.collect() - if len(all_found_textline_polygons)>0: + + if len(all_found_textline_polygons): ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - else: - ocr_all_textlines = None + image_page, all_found_textline_polygons, all_box_coord, + self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0: + if len(all_found_textline_polygons_marginals_left): ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons_marginals_left, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - else: - ocr_all_textlines_marginals_left = None + image_page, all_found_textline_polygons_marginals_left, all_box_coord_marginals_left, + self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0: + if len(all_found_textline_polygons_marginals_right): ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons_marginals_right, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - else: - ocr_all_textlines_marginals_right = None + image_page, all_found_textline_polygons_marginals_right, all_box_coord_marginals_right, + self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - if all_found_textline_polygons_h and len(all_found_textline_polygons)>0: + if self.full_layout and len(all_found_textline_polygons): ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons_h, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - else: - ocr_all_textlines_h = None + image_page, all_found_textline_polygons_h, all_box_coord_h, + self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - if polygons_of_drop_capitals and len(polygons_of_drop_capitals)>0: + if self.full_layout and len(polygons_of_drop_capitals): ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines( - image_page, polygons_of_drop_capitals, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - else: - ocr_all_textlines_drop = None + image_page, polygons_of_drop_capitals, np.zeros((len(polygons_of_drop_capitals), 4)), + self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) + else: - ocr_all_textlines = None - ocr_all_textlines_marginals_left = None - ocr_all_textlines_marginals_right = None - ocr_all_textlines_h = None - ocr_all_textlines_drop = None + if self.light_version: + self.logger.info("Using light version OCR") + if self.textline_light: + self.logger.info("Using light text line detection for OCR") + self.logger.info("Processing text lines...") + + gc.collect() + + torch.cuda.empty_cache() + self.model_ocr.to(self.device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + ocr_all_textlines = [] + # FIXME: what about lines in marginals / headings / drop-capitals here? + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) + text_ocr = self.return_ocr_of_textline_without_common_section( + img_croped, self.model_ocr, self.processor, self.device, w, h2w_ratio, ind_tot) + ocr_textline_in_textregion.append(text_ocr) + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) - self.logger.info("Step 5/5: Output Generation") - + self.logger.info("Step 5/5: Output Generation") + + if self.full_layout: pcgts = self.writer.build_pagexml_full_layout( contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, @@ -5216,135 +4848,20 @@ class Eynollah: ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h) - - return pcgts - - contours_only_text_parent_h = None - self.logger.info("Step 4/5: Reading Order Detection") - - if self.reading_order_machine_based: - self.logger.info("Using machine-based detection") - if self.right2left: - self.logger.info("Right-to-left mode enabled") - if self.headers_off: - self.logger.info("Headers ignored in reading order") - - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( - contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions( - contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) - else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order( - contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = \ - #list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = \ - #list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions( - contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - - if self.ocr and self.tr: - self.logger.info("Step 4.5/5: OCR Processing") - if torch.cuda.is_available(): - self.logger.info("Using GPU acceleration") - else: - self.logger.info("Using CPU processing") - if self.light_version: - self.logger.info("Using light version OCR") - if self.textline_light: - self.logger.info("Using light text line detection for OCR") - self.logger.info("Processing text lines...") + pcgts = self.writer.build_pagexml_no_full_layout( + contours_only_text_parent, page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, all_box_coord, polygons_of_images, + polygons_of_marginals_left, polygons_of_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, contours_tables, + ocr_all_textlines=ocr_all_textlines, + ocr_all_textlines_marginals_left=ocr_all_textlines_marginals_left, + ocr_all_textlines_marginals_right=ocr_all_textlines_marginals_right, + conf_contours_textregions=conf_contours_textregions) - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 - - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - #cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped) - text_ocr = self.return_ocr_of_textline_without_common_section( - img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) - ocr_textline_in_textregion.append(text_ocr) - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) - - elif self.ocr and not self.tr: - gc.collect() - if len(all_found_textline_polygons)>0: - ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - - if all_found_textline_polygons_marginals_left and len(all_found_textline_polygons_marginals_left)>0: - ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons_marginals_left, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - - if all_found_textline_polygons_marginals_right and len(all_found_textline_polygons_marginals_right)>0: - ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines( - image_page, all_found_textline_polygons_marginals_right, self.prediction_model, - self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) - - else: - ocr_all_textlines = None - ocr_all_textlines_marginals_left = None - ocr_all_textlines_marginals_right = None - self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s") - - self.logger.info("Step 5/5: Output Generation") - self.logger.info("Generating PAGE-XML output") - - pcgts = self.writer.build_pagexml_no_full_layout( - txt_con_org, page_coord, order_text_new, id_of_texts_tot, - all_found_textline_polygons, all_box_coord, polygons_of_images, - polygons_of_marginals_left, polygons_of_marginals_right, - all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, - all_box_coord_marginals_left, all_box_coord_marginals_right, - slopes, slopes_marginals_left, slopes_marginals_right, - cont_page, polygons_seplines, contours_tables, ocr_all_textlines, - ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, - conf_contours_textregions) - return pcgts diff --git a/src/eynollah/image_enhancer.py b/src/eynollah/image_enhancer.py index 89dde16..9247efe 100644 --- a/src/eynollah/image_enhancer.py +++ b/src/eynollah/image_enhancer.py @@ -6,23 +6,23 @@ from logging import Logger import os import time from typing import Optional -import atexit -from functools import partial from pathlib import Path -from multiprocessing import cpu_count import gc + import cv2 import numpy as np from ocrd_utils import getLogger, tf_disable_interactive_logs import tensorflow as tf from skimage.morphology import skeletonize from tensorflow.keras.models import load_model + from .utils.resize import resize_image from .utils.pil_cv2 import pil2cv from .utils import ( is_image_filename, crop_image_inside_box ) +from .eynollah import PatchEncoder, Patches DPI_THRESHOLD = 298 KERNEL = np.ones((5, 5), np.uint8) diff --git a/src/eynollah/mb_ro_on_layout.py b/src/eynollah/mb_ro_on_layout.py index 45db8e4..218f973 100644 --- a/src/eynollah/mb_ro_on_layout.py +++ b/src/eynollah/mb_ro_on_layout.py @@ -6,25 +6,24 @@ from logging import Logger import os import time from typing import Optional -import atexit -from functools import partial from pathlib import Path -from multiprocessing import cpu_count import xml.etree.ElementTree as ET + import cv2 import numpy as np from ocrd_utils import getLogger import statistics import tensorflow as tf from tensorflow.keras.models import load_model -from .utils.resize import resize_image +from .utils.resize import resize_image from .utils.contour import ( find_new_features_of_contours, return_contours_of_image, return_parent_contours, ) from .utils import is_xml_filename +from .eynollah import PatchEncoder, Patches DPI_THRESHOLD = 298 KERNEL = np.ones((5, 5), np.uint8) diff --git a/src/eynollah/utils/__init__.py b/src/eynollah/utils/__init__.py index 52bf3ef..5ccb2af 100644 --- a/src/eynollah/utils/__init__.py +++ b/src/eynollah/utils/__init__.py @@ -15,10 +15,21 @@ from scipy.ndimage import gaussian_filter1d from .is_nan import isNaN from .contour import (contours_in_same_horizon, + find_center_of_contours, find_new_features_of_contours, return_contours_of_image, return_parent_contours) +def pairwise(iterable): + # pairwise('ABCDEFG') → AB BC CD DE EF FG + + iterator = iter(iterable) + a = next(iterator, None) + + for b in iterator: + yield a, b + a = b + def return_x_start_end_mothers_childs_and_type_of_reading_order( x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff): @@ -785,7 +796,7 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): return len(peaks_fin_true), peaks_fin_true def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8): - regions_without_separators_0 = regions_without_separators[:, :, 0].sum(axis=0) + regions_without_separators_0 = regions_without_separators.sum(axis=0) ##plt.plot(regions_without_separators_0) ##plt.show() @@ -812,7 +823,10 @@ def return_regions_without_separators(regions_pre): return regions_without_separators def put_drop_out_from_only_drop_model(layout_no_patch, layout1): - drop_only = (layout_no_patch[:, :, 0] == 4) * 1 + if layout_no_patch.ndim == 3: + layout_no_patch = layout_no_patch[:, :, 0] + + drop_only = (layout_no_patch[:, :] == 4) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) @@ -838,9 +852,8 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1): (map_of_drop_contour_bb == 5).sum()) >= 15: contours_drop_parent_final.append(contours_drop_parent[jj]) - layout_no_patch[:, :, 0][layout_no_patch[:, :, 0] == 4] = 0 - - layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=(4, 4, 4)) + layout_no_patch[:, :][layout_no_patch[:, :] == 4] = 0 + layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=4) return layout_no_patch @@ -914,29 +927,28 @@ def check_any_text_region_in_model_one_is_main_or_header( contours_only_text_parent_main_d=[] contours_only_text_parent_head_d=[] - for ii in range(len(contours_only_text_parent)): - con=contours_only_text_parent[ii] - img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) - img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) + for ii, con in enumerate(contours_only_text_parent): + img = np.zeros(regions_model_1.shape[:2]) + img = cv2.fillPoly(img, pts=[con], color=255) - all_pixels=((img[:,:,0]==255)*1).sum() - pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() + all_pixels=((img == 255)*1).sum() + pixels_header=( ( (img == 255) & (regions_model_full[:,:,0]==2) )*1 ).sum() pixels_main=all_pixels-pixels_header if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ]=2 contours_only_text_parent_head.append(con) - if contours_only_text_parent_d_ordered is not None: + if len(contours_only_text_parent_d_ordered): contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_head.append(all_box_coord[ii]) slopes_head.append(slopes[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) conf_contours_head.append(None) else: - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ]=1 contours_only_text_parent_main.append(con) conf_contours_main.append(conf_contours[ii]) - if contours_only_text_parent_d_ordered is not None: + if len(contours_only_text_parent_d_ordered): contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_main.append(all_box_coord[ii]) slopes_main.append(slopes[ii]) @@ -1004,11 +1016,11 @@ def check_any_text_region_in_model_one_is_main_or_header_light( contours_only_text_parent_head_d=[] for ii, con in enumerate(contours_only_text_parent_z): - img=np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) - img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) + img = np.zeros(regions_model_1.shape[:2]) + img = cv2.fillPoly(img, pts=[con], color=255) - all_pixels = (img[:,:,0]==255).sum() - pixels_header=((img[:,:,0]==255) & + all_pixels = (img == 255).sum() + pixels_header=((img == 255) & (regions_model_full[:,:,0]==2)).sum() pixels_main = all_pixels - pixels_header @@ -1018,20 +1030,20 @@ def check_any_text_region_in_model_one_is_main_or_header_light( ( pixels_header / float(pixels_main) >= 0.3 and length_con[ii] / float(height_con[ii]) >=3 )): - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 2 + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ] = 2 contours_only_text_parent_head.append(contours_only_text_parent[ii]) conf_contours_head.append(None) # why not conf_contours[ii], too? - if contours_only_text_parent_d_ordered is not None: + if len(contours_only_text_parent_d_ordered): contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_head.append(all_box_coord[ii]) slopes_head.append(slopes[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) else: - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ] = 1 + regions_model_1[:,:][(regions_model_1[:,:]==1) & (img == 255) ] = 1 contours_only_text_parent_main.append(contours_only_text_parent[ii]) conf_contours_main.append(conf_contours[ii]) - if contours_only_text_parent_d_ordered is not None: + if len(contours_only_text_parent_d_ordered): contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) all_box_coord_main.append(all_box_coord[ii]) slopes_main.append(slopes[ii]) @@ -1108,11 +1120,11 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) textlines_big.append(textlines_tot[i]) textlines_big_org_form.append(textlines_tot_org_form[i]) - img_textline_s = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1])) - img_textline_s = cv2.fillPoly(img_textline_s, pts=textlines_small, color=(1, 1, 1)) + img_textline_s = np.zeros(textline_iamge.shape[:2]) + img_textline_s = cv2.fillPoly(img_textline_s, pts=textlines_small, color=1) - img_textline_b = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1])) - img_textline_b = cv2.fillPoly(img_textline_b, pts=textlines_big, color=(1, 1, 1)) + img_textline_b = np.zeros(textline_iamge.shape[:2]) + img_textline_b = cv2.fillPoly(img_textline_b, pts=textlines_big, color=1) sum_small_big_all = img_textline_s + img_textline_b sum_small_big_all2 = (sum_small_big_all[:, :] == 2) * 1 @@ -1124,11 +1136,11 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) # print(len(textlines_small),'small') intersections = [] for z2 in range(len(textlines_big)): - img_text = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1])) - img_text = cv2.fillPoly(img_text, pts=[textlines_small[z1]], color=(1, 1, 1)) + img_text = np.zeros(textline_iamge.shape[:2]) + img_text = cv2.fillPoly(img_text, pts=[textlines_small[z1]], color=1) - img_text2 = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1])) - img_text2 = cv2.fillPoly(img_text2, pts=[textlines_big[z2]], color=(1, 1, 1)) + img_text2 = np.zeros(textline_iamge.shape[:2]) + img_text2 = cv2.fillPoly(img_text2, pts=[textlines_big[z2]], color=1) sum_small_big = img_text2 + img_text sum_small_big_2 = (sum_small_big[:, :] == 2) * 1 @@ -1154,19 +1166,17 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) index_small_textlines = list(np.where(np.array(dis_small_from_bigs_tot) == z)[0]) # print(z,index_small_textlines) - img_text2 = np.zeros((textline_iamge.shape[0], textline_iamge.shape[1], 3)) - img_text2 = cv2.fillPoly(img_text2, pts=[textlines_big[z]], color=(255, 255, 255)) + img_text2 = np.zeros(textline_iamge.shape[:2], dtype=np.uint8) + img_text2 = cv2.fillPoly(img_text2, pts=[textlines_big[z]], color=255) textlines_big_with_change.append(z) for k in index_small_textlines: - img_text2 = cv2.fillPoly(img_text2, pts=[textlines_small[k]], color=(255, 255, 255)) + img_text2 = cv2.fillPoly(img_text2, pts=[textlines_small[k]], color=255) textlines_small_with_change.append(k) - img_text2 = img_text2.astype(np.uint8) - imgray = cv2.cvtColor(img_text2, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - cont, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + _, thresh = cv2.threshold(img_text2, 0, 255, 0) + cont, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # print(cont[0],type(cont)) textlines_big_with_change_con.append(cont) @@ -1178,111 +1188,51 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) # print(textlines_big_with_change,'textlines_big_with_change') # print(textlines_small_with_change,'textlines_small_with_change') # print(textlines_big) - textlines_con_changed.append(textlines_big_org_form) - else: - textlines_con_changed.append(textlines_big_org_form) + + textlines_con_changed.append(textlines_big_org_form) return textlines_con_changed -def order_of_regions(textline_mask, contours_main, contours_header, y_ref): +def order_of_regions(textline_mask, contours_main, contours_head, y_ref): ##plt.imshow(textline_mask) ##plt.show() - """ - print(len(contours_main),'contours_main') - mada_n=textline_mask.sum(axis=1) - y=mada_n[:] - - y_help=np.zeros(len(y)+40) - y_help[20:len(y)+20]=y - x=np.arange(len(y)) - - peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) - ##plt.imshow(textline_mask[:,:]) - ##plt.show() - - sigma_gaus=8 - z= gaussian_filter1d(y_help, sigma_gaus) - zneg_rev=-y_help+np.max(y_help) - zneg=np.zeros(len(zneg_rev)+40) - zneg[20:len(zneg_rev)+20]=zneg_rev - zneg= gaussian_filter1d(zneg, sigma_gaus) - - peaks, _ = find_peaks(z, height=0) - peaks_neg, _ = find_peaks(zneg, height=0) - peaks_neg=peaks_neg-20-20 - peaks=peaks-20 - """ - textline_sum_along_width = textline_mask.sum(axis=1) - - y = textline_sum_along_width[:] + y = textline_mask.sum(axis=1) # horizontal projection profile y_padded = np.zeros(len(y) + 40) y_padded[20 : len(y) + 20] = y - x = np.arange(len(y)) - - peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) sigma_gaus = 8 - z = gaussian_filter1d(y_padded, sigma_gaus) - zneg_rev = -y_padded + np.max(y_padded) + #z = gaussian_filter1d(y_padded, sigma_gaus) + #peaks, _ = find_peaks(z, height=0) + #peaks = peaks - 20 + zneg_rev = np.max(y_padded) - y_padded zneg = np.zeros(len(zneg_rev) + 40) zneg[20 : len(zneg_rev) + 20] = zneg_rev zneg = gaussian_filter1d(zneg, sigma_gaus) - peaks, _ = find_peaks(z, height=0) peaks_neg, _ = find_peaks(zneg, height=0) peaks_neg = peaks_neg - 20 - 20 - peaks = peaks - 20 ##plt.plot(z) ##plt.show() - if contours_main != None: - areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) - M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] - cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) - x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) + cx_main, cy_main = find_center_of_contours(contours_main) + cx_head, cy_head = find_center_of_contours(contours_head) - y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) - y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) + peaks_neg_new = np.append(np.insert(peaks_neg, 0, 0), textline_mask.shape[0]) + # offset from bbox of mask + peaks_neg_new += y_ref - if len(contours_header) != None: - areas_header = np.array([cv2.contourArea(contours_header[j]) for j in range(len(contours_header))]) - M_header = [cv2.moments(contours_header[j]) for j in range(len(contours_header))] - cx_header = [(M_header[j]["m10"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))] - cy_header = [(M_header[j]["m01"] / (M_header[j]["m00"] + 1e-32)) for j in range(len(M_header))] + # assert not len(cy_main) or np.min(peaks_neg_new) <= np.min(cy_main) and np.max(cy_main) <= np.max(peaks_neg_new) + # assert not len(cy_head) or np.min(peaks_neg_new) <= np.min(cy_head) and np.max(cy_head) <= np.max(peaks_neg_new) - x_min_header = np.array([np.min(contours_header[j][:, 0, 0]) for j in range(len(contours_header))]) - x_max_header = np.array([np.max(contours_header[j][:, 0, 0]) for j in range(len(contours_header))]) - - y_min_header = np.array([np.min(contours_header[j][:, 0, 1]) for j in range(len(contours_header))]) - y_max_header = np.array([np.max(contours_header[j][:, 0, 1]) for j in range(len(contours_header))]) - # print(cy_main,'mainy') - - peaks_neg_new = [] - peaks_neg_new.append(0 + y_ref) - for iii in range(len(peaks_neg)): - peaks_neg_new.append(peaks_neg[iii] + y_ref) - peaks_neg_new.append(textline_mask.shape[0] + y_ref) - - if len(cy_main) > 0 and np.max(cy_main) > np.max(peaks_neg_new): - cy_main = np.array(cy_main) * (np.max(peaks_neg_new) / np.max(cy_main)) - 10 - if contours_main != None: - indexer_main = np.arange(len(contours_main)) - if contours_main != None: - len_main = len(contours_main) - else: - len_main = 0 - - matrix_of_orders = np.zeros((len(contours_main) + len(contours_header), 5)) - matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_header)) + matrix_of_orders = np.zeros((len(contours_main) + len(contours_head), 5), dtype=int) + matrix_of_orders[:, 0] = np.arange(len(contours_main) + len(contours_head)) matrix_of_orders[: len(contours_main), 1] = 1 matrix_of_orders[len(contours_main) :, 1] = 2 matrix_of_orders[: len(contours_main), 2] = cx_main - matrix_of_orders[len(contours_main) :, 2] = cx_header + matrix_of_orders[len(contours_main) :, 2] = cx_head matrix_of_orders[: len(contours_main), 3] = cy_main - matrix_of_orders[len(contours_main) :, 3] = cy_header + matrix_of_orders[len(contours_main) :, 3] = cy_head matrix_of_orders[: len(contours_main), 4] = np.arange(len(contours_main)) - matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_header)) + matrix_of_orders[len(contours_main) :, 4] = np.arange(len(contours_head)) # print(peaks_neg_new,'peaks_neg_new') # print(matrix_of_orders,'matrix_of_orders') @@ -1290,70 +1240,42 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): final_indexers_sorted = [] final_types = [] final_index_type = [] - for i in range(len(peaks_neg_new) - 1): - top = peaks_neg_new[i] - down = peaks_neg_new[i + 1] - indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & - ((matrix_of_orders[:, 3] < down))] - cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) & - ((matrix_of_orders[:, 3] < down))] - cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) & - ((matrix_of_orders[:, 3] < down))] - types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) & - (matrix_of_orders[:, 3] < down)] - index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) & - (matrix_of_orders[:, 3] < down)] + for top, bot in pairwise(peaks_neg_new): + indexes_in, types_in, cxs_in, cys_in, typed_indexes_in = \ + matrix_of_orders[(matrix_of_orders[:, 3] >= top) & + (matrix_of_orders[:, 3] < bot)].T sorted_inside = np.argsort(cxs_in) - ind_in_int = indexes_in[sorted_inside] - ind_in_type = types_of_text[sorted_inside] - ind_ind_type = index_types_of_text[sorted_inside] - for j in range(len(ind_in_int)): - final_indexers_sorted.append(int(ind_in_int[j])) - final_types.append(int(ind_in_type[j])) - final_index_type.append(int(ind_ind_type[j])) + final_indexers_sorted.extend(indexes_in[sorted_inside]) + final_types.extend(types_in[sorted_inside]) + final_index_type.extend(typed_indexes_in[sorted_inside]) ##matrix_of_orders[:len_main,4]=final_indexers_sorted[:] - # This fix is applied if the sum of the lengths of contours and contours_h - # does not match final_indexers_sorted. However, this is not the optimal solution.. - if len(cy_main) + len(cy_header) == len(final_index_type): - pass - else: - indexes_missed = set(np.arange(len(cy_main) + len(cy_header))) - set(final_indexers_sorted) - for ind_missed in indexes_missed: - final_indexers_sorted.append(ind_missed) - final_types.append(1) - final_index_type.append(ind_missed) + # assert len(final_indexers_sorted) == len(contours_main) + len(contours_head) + # assert not len(final_indexers_sorted) or max(final_index_type) == max(len(contours_main) - return final_indexers_sorted, matrix_of_orders, final_types, final_index_type + return np.array(final_indexers_sorted), np.array(final_types), np.array(final_index_type) def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( img_p_in_ver, img_in_hor,num_col_classifier): #img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2) - img_p_in_ver=img_p_in_ver.astype(np.uint8) - img_p_in_ver=np.repeat(img_p_in_ver[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(img_p_in_ver, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - contours_lines_ver,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + _, thresh = cv2.threshold(img_p_in_ver, 0, 255, 0) + contours_lines_ver, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) slope_lines_ver, _, x_min_main_ver, _, _, _, y_min_main_ver, y_max_main_ver, cx_main_ver = \ find_features_of_lines(contours_lines_ver) for i in range(len(x_min_main_ver)): img_p_in_ver[int(y_min_main_ver[i]): int(y_min_main_ver[i])+30, int(cx_main_ver[i])-25: - int(cx_main_ver[i])+25, 0] = 0 + int(cx_main_ver[i])+25] = 0 img_p_in_ver[int(y_max_main_ver[i])-30: int(y_max_main_ver[i]), int(cx_main_ver[i])-25: - int(cx_main_ver[i])+25, 0] = 0 + int(cx_main_ver[i])+25] = 0 - img_in_hor=img_in_hor.astype(np.uint8) - img_in_hor=np.repeat(img_in_hor[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(img_in_hor, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_lines_hor,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + _, thresh = cv2.threshold(img_in_hor, 0, 255, 0) + contours_lines_hor, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) slope_lines_hor, dist_x_hor, x_min_main_hor, x_max_main_hor, cy_main_hor, _, _, _, _ = \ find_features_of_lines(contours_lines_hor) @@ -1409,22 +1331,19 @@ def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new( img_p_in=img_in_hor special_separators=[] - img_p_in_ver[:,:,0][img_p_in_ver[:,:,0]==255]=1 - sep_ver_hor=img_p_in+img_p_in_ver - sep_ver_hor_cross=(sep_ver_hor[:,:,0]==2)*1 - sep_ver_hor_cross=np.repeat(sep_ver_hor_cross[:, :, np.newaxis], 3, axis=2) - sep_ver_hor_cross=sep_ver_hor_cross.astype(np.uint8) - imgray = cv2.cvtColor(sep_ver_hor_cross, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_cross,_=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - cx_cross,cy_cross ,_ , _, _ ,_,_=find_new_features_of_contours(contours_cross) - for ii in range(len(cx_cross)): - img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])+5:int(cx_cross[ii])+40,0]=0 - img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])-40:int(cx_cross[ii])-4,0]=0 + img_p_in_ver[img_p_in_ver == 255] = 1 + sep_ver_hor = img_p_in + img_p_in_ver + sep_ver_hor_cross = (sep_ver_hor == 2) * 1 + _, thresh = cv2.threshold(sep_ver_hor_cross.astype(np.uint8), 0, 255, 0) + contours_cross, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + center_cross = np.array(find_center_of_contours(contours_cross), dtype=int) + for cx, cy in center_cross.T: + img_p_in[cy - 30: cy + 30, cx + 5: cx + 40] = 0 + img_p_in[cy - 30: cy + 30, cx - 40: cx - 4] = 0 else: img_p_in=np.copy(img_in_hor) special_separators=[] - return img_p_in[:,:,0], special_separators + return img_p_in, special_separators def return_points_with_boundies(peaks_neg_fin, first_point, last_point): peaks_neg_tot = [] @@ -1434,11 +1353,11 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point): peaks_neg_tot.append(last_point) return peaks_neg_tot -def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None): +def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, label_lines, contours_h=None): t_ins_c0 = time.time() - separators_closeup=( (region_pre_p[:,:,:]==pixel_lines))*1 - separators_closeup[0:110,:,:]=0 - separators_closeup[separators_closeup.shape[0]-150:,:,:]=0 + separators_closeup=( (region_pre_p[:,:]==label_lines))*1 + separators_closeup[0:110,:]=0 + separators_closeup[separators_closeup.shape[0]-150:,:]=0 kernel = np.ones((5,5),np.uint8) separators_closeup=separators_closeup.astype(np.uint8) @@ -1450,15 +1369,11 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, separators_closeup_n=separators_closeup_n.astype(np.uint8) separators_closeup_n_binary=np.zeros(( separators_closeup_n.shape[0],separators_closeup_n.shape[1]) ) - separators_closeup_n_binary[:,:]=separators_closeup_n[:,:,0] + separators_closeup_n_binary[:,:]=separators_closeup_n[:,:] separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]!=0]=1 - gray_early=np.repeat(separators_closeup_n_binary[:, :, np.newaxis], 3, axis=2) - gray_early=gray_early.astype(np.uint8) - imgray_e = cv2.cvtColor(gray_early, cv2.COLOR_BGR2GRAY) - ret_e, thresh_e = cv2.threshold(imgray_e, 0, 255, 0) - - contours_line_e,hierarchy_e=cv2.findContours(thresh_e,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + _, thresh_e = cv2.threshold(separators_closeup_n_binary, 0, 255, 0) + contours_line_e, _ = cv2.findContours(thresh_e.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) _, dist_xe, _, _, _, _, y_min_main, y_max_main, _ = \ find_features_of_lines(contours_line_e) dist_ye = y_max_main - y_min_main @@ -1468,10 +1383,8 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, cnts_hor_e=[] for ce in args_hor_e: cnts_hor_e.append(contours_line_e[ce]) - figs_e=np.zeros(thresh_e.shape) - figs_e=cv2.fillPoly(figs_e,pts=cnts_hor_e,color=(1,1,1)) - separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=(0,0,0)) + separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=0) gray = cv2.bitwise_not(separators_closeup_n_binary) gray=gray.astype(np.uint8) @@ -1491,7 +1404,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, kernel = np.ones((5,5),np.uint8) horizontal = cv2.dilate(horizontal,kernel,iterations = 2) horizontal = cv2.erode(horizontal,kernel,iterations = 2) - horizontal = cv2.fillPoly(horizontal, pts=cnts_hor_e, color=(255,255,255)) + horizontal = cv2.fillPoly(horizontal, pts=cnts_hor_e, color=255) rows = vertical.shape[0] verticalsize = rows // 30 @@ -1509,13 +1422,8 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, separators_closeup_new[:,:][vertical[:,:]!=0]=1 separators_closeup_new[:,:][horizontal[:,:]!=0]=1 - vertical=np.repeat(vertical[:, :, np.newaxis], 3, axis=2) - vertical=vertical.astype(np.uint8) - - imgray = cv2.cvtColor(vertical, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - contours_line_vers,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + _, thresh = cv2.threshold(vertical, 0, 255, 0) + contours_line_vers, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \ find_features_of_lines(contours_line_vers) @@ -1530,11 +1438,8 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, dist_y_ver=y_max_main_ver-y_min_main_ver len_y=separators_closeup.shape[0]/3.0 - horizontal=np.repeat(horizontal[:, :, np.newaxis], 3, axis=2) - horizontal=horizontal.astype(np.uint8) - imgray = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_line_hors,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + _, thresh = cv2.threshold(horizontal, 0, 255, 0) + contours_line_hors, _ = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = \ find_features_of_lines(contours_line_hors) @@ -1627,7 +1532,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, peaks_neg_fin_fin=[] for itiles in args_big_parts: regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]): - int(splitter_y_new[itiles+1]),:,0] + int(splitter_y_new[itiles+1]),:] try: num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0) diff --git a/src/eynollah/utils/contour.py b/src/eynollah/utils/contour.py index 0700ed4..f998c4d 100644 --- a/src/eynollah/utils/contour.py +++ b/src/eynollah/utils/contour.py @@ -36,14 +36,8 @@ def find_contours_mean_y_diff(contours_main): return np.mean(np.diff(np.sort(np.array(cy_main)))) def get_text_region_boxes_by_given_contours(contours): - boxes = [] - contours_new = [] - for jj in range(len(contours)): - box = cv2.boundingRect(contours[jj]) - boxes.append(box) - contours_new.append(contours[jj]) - - return boxes, contours_new + return [cv2.boundingRect(contour) + for contour in contours] def filter_contours_area_of_image(image, contours, hierarchy, max_area=1.0, min_area=0.0, dilate=0): found_polygons_early = [] @@ -79,61 +73,37 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area=1. found_polygons_early.append(polygon2contour(polygon)) return found_polygons_early -def find_new_features_of_contours(contours_main): - areas_main = np.array([cv2.contourArea(contours_main[j]) - for j in range(len(contours_main))]) - M_main = [cv2.moments(contours_main[j]) - for j in range(len(contours_main))] - cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) - for j in range(len(M_main))] - cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) - for j in range(len(M_main))] - try: - x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) - for j in range(len(contours_main))]) - argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0]) - for j in range(len(contours_main))]) - x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0] - for j in range(len(contours_main))]) - y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 1] - for j in range(len(contours_main))]) - x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) - for j in range(len(contours_main))]) - y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) - for j in range(len(contours_main))]) - y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) - for j in range(len(contours_main))]) - except: - x_min_main = np.array([np.min(contours_main[j][:, 0]) - for j in range(len(contours_main))]) - argmin_x_main = np.array([np.argmin(contours_main[j][:, 0]) - for j in range(len(contours_main))]) - x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0] - for j in range(len(contours_main))]) - y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1] - for j in range(len(contours_main))]) - x_max_main = np.array([np.max(contours_main[j][:, 0]) - for j in range(len(contours_main))]) - y_min_main = np.array([np.min(contours_main[j][:, 1]) - for j in range(len(contours_main))]) - y_max_main = np.array([np.max(contours_main[j][:, 1]) - for j in range(len(contours_main))]) - # dis_x=np.abs(x_max_main-x_min_main) +def find_center_of_contours(contours): + moments = [cv2.moments(contour) for contour in contours] + cx = [feat["m10"] / (feat["m00"] + 1e-32) + for feat in moments] + cy = [feat["m01"] / (feat["m00"] + 1e-32) + for feat in moments] + return cx, cy - return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin +def find_new_features_of_contours(contours): + # areas = np.array([cv2.contourArea(contour) for contour in contours]) + cx, cy = find_center_of_contours(contours) + slice_x = np.index_exp[:, 0, 0] + slice_y = np.index_exp[:, 0, 1] + if any(contour.ndim < 3 for contour in contours): + slice_x = np.index_exp[:, 0] + slice_y = np.index_exp[:, 1] + x_min = np.array([np.min(contour[slice_x]) for contour in contours]) + x_max = np.array([np.max(contour[slice_x]) for contour in contours]) + y_min = np.array([np.min(contour[slice_y]) for contour in contours]) + y_max = np.array([np.max(contour[slice_y]) for contour in contours]) + # dis_x=np.abs(x_max-x_min) + y_corr_x_min = np.array([contour[np.argmin(contour[slice_x])][slice_y[1:]] + for contour in contours]) -def find_features_of_contours(contours_main): - areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) - M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))] - cx_main=[(M_main[j]['m10']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] - cy_main=[(M_main[j]['m01']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] - x_min_main=np.array([np.min(contours_main[j][:,0,0]) for j in range(len(contours_main))]) - x_max_main=np.array([np.max(contours_main[j][:,0,0]) for j in range(len(contours_main))]) + return cx, cy, x_min, x_max, y_min, y_max, y_corr_x_min - y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))]) - y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))]) +def find_features_of_contours(contours): + y_min = np.array([np.min(contour[:,0,1]) for contour in contours]) + y_max = np.array([np.max(contour[:,0,1]) for contour in contours]) - return y_min_main, y_max_main + return y_min, y_max def return_parent_contours(contours, hierarchy): contours_parent = [contours[i] @@ -143,14 +113,11 @@ def return_parent_contours(contours, hierarchy): def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002): # pixels of images are identified by 5 - if len(region_pre_p.shape) == 3: + if region_pre_p.ndim == 3: cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: cnts_images = (region_pre_p[:, :] == label) * 1 - cnts_images = cnts_images.astype(np.uint8) - cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + _, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0) contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) @@ -159,13 +126,11 @@ def return_contours_of_interested_region(region_pre_p, label, min_area=0.0002): return contours_imgs def do_work_of_contours_in_image(contour, index_r_con, img, slope_first): - img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[contour], color=(1, 1, 1)) + img_copy = np.zeros(img.shape[:2], dtype=np.uint8) + img_copy = cv2.fillPoly(img_copy, pts=[contour], color=1) img_copy = rotation_image_new(img_copy, -slope_first) - img_copy = img_copy.astype(np.uint8) - imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + _, thresh = cv2.threshold(img_copy, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) @@ -188,8 +153,8 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): cnts_org = [] # print(cnts,'cnts') for i in range(len(cnts)): - img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1)) + img_copy = np.zeros(img.shape[:2], dtype=np.uint8) + img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=1) # plt.imshow(img_copy) # plt.show() @@ -200,9 +165,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): # plt.imshow(img_copy) # plt.show() - img_copy = img_copy.astype(np.uint8) - imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + _, thresh = cv2.threshold(img_copy, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) @@ -219,12 +182,11 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): interpolation=cv2.INTER_NEAREST) cnts_org = [] for cnt in cnts: - img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[(cnt / zoom).astype(int)], color=(1, 1, 1)) + img_copy = np.zeros(img.shape[:2], dtype=np.uint8) + img_copy = cv2.fillPoly(img_copy, pts=[cnt // zoom], color=1) img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8) - imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + _, thresh = cv2.threshold(img_copy, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) @@ -234,14 +196,13 @@ def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): return cnts_org def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first, confidence_matrix): - img_copy = np.zeros(img.shape) - img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=(1, 1, 1)) - confidence_matrix_mapped_with_contour = confidence_matrix * img_copy[:,:,0] - confidence_contour = np.sum(confidence_matrix_mapped_with_contour) / float(np.sum(img_copy[:,:,0])) + img_copy = np.zeros(img.shape[:2], dtype=np.uint8) + img_copy = cv2.fillPoly(img_copy, pts=[contour_par], color=1) + confidence_matrix_mapped_with_contour = confidence_matrix * img_copy + confidence_contour = np.sum(confidence_matrix_mapped_with_contour) / float(np.sum(img_copy)) img_copy = rotation_image_new(img_copy, -slope_first).astype(np.uint8) - imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + _, thresh = cv2.threshold(img_copy, 0, 255, 0) cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) if len(cont_int)==0: @@ -255,7 +216,7 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix): if not len(cnts): - return [], [] + return [] confidence_matrix = cv2.resize(confidence_matrix, (img.shape[1] // 6, img.shape[0] // 6), @@ -265,18 +226,15 @@ def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix): cnt_mask = np.zeros(confidence_matrix.shape) cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0) confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask)) - return cnts, confs + return confs def return_contours_of_interested_textline(region_pre_p, label): # pixels of images are identified by 5 - if len(region_pre_p.shape) == 3: + if region_pre_p.ndim == 3: cnts_images = (region_pre_p[:, :, 0] == label) * 1 else: cnts_images = (region_pre_p[:, :] == label) * 1 - cnts_images = cnts_images.astype(np.uint8) - cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + _, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0) contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) @@ -286,54 +244,15 @@ def return_contours_of_interested_textline(region_pre_p, label): def return_contours_of_image(image): if len(image.shape) == 2: - image = np.repeat(image[:, :, np.newaxis], 3, axis=2) image = image.astype(np.uint8) + imgray = image else: image = image.astype(np.uint8) - imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) + imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(imgray, 0, 255, 0) contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) return contours, hierarchy -def return_contours_of_interested_region_by_min_size(region_pre_p, label, min_size=0.00003): - # pixels of images are identified by 5 - if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == label) * 1 - else: - cnts_images = (region_pre_p[:, :] == label) * 1 - cnts_images = cnts_images.astype(np.uint8) - cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables( - thresh, contours_imgs, hierarchy, max_area=1, min_area=min_size) - - return contours_imgs - -def return_contours_of_interested_region_by_size(region_pre_p, label, min_area, max_area): - # pixels of images are identified by 5 - if len(region_pre_p.shape) == 3: - cnts_images = (region_pre_p[:, :, 0] == label) * 1 - else: - cnts_images = (region_pre_p[:, :] == label) * 1 - cnts_images = cnts_images.astype(np.uint8) - cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - contours_imgs = return_parent_contours(contours_imgs, hierarchy) - contours_imgs = filter_contours_area_of_image_tables( - thresh, contours_imgs, hierarchy, max_area=max_area, min_area=min_area) - - img_ret = np.zeros((region_pre_p.shape[0], region_pre_p.shape[1], 3)) - img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1)) - - return img_ret[:, :, 0] - def dilate_textline_contours(all_found_textline_polygons): return [[polygon2contour(contour2polygon(contour, dilate=6)) for contour in region] @@ -359,6 +278,21 @@ def polygon2contour(polygon: Polygon) -> np.ndarray: polygon = np.array(polygon.exterior.coords[:-1], dtype=int) return np.maximum(0, polygon).astype(np.uint)[:, np.newaxis] +def make_intersection(poly1, poly2): + interp = poly1.intersection(poly2) + # post-process + if interp.is_empty or interp.area == 0.0: + return None + if interp.geom_type == 'GeometryCollection': + # heterogeneous result: filter zero-area shapes (LineString, Point) + interp = unary_union([geom for geom in interp.geoms if geom.area > 0]) + if interp.geom_type == 'MultiPolygon': + # homogeneous result: construct convex hull to connect + interp = join_polygons(interp.geoms) + assert interp.geom_type == 'Polygon', interp.wkt + interp = make_valid(interp) + return interp + def make_valid(polygon: Polygon) -> Polygon: """Ensures shapely.geometry.Polygon object is valid by repeated rearrangement/simplification/enlargement.""" def isint(x): diff --git a/src/eynollah/utils/drop_capitals.py b/src/eynollah/utils/drop_capitals.py index 67547d3..9f82fac 100644 --- a/src/eynollah/utils/drop_capitals.py +++ b/src/eynollah/utils/drop_capitals.py @@ -1,6 +1,7 @@ import numpy as np import cv2 from .contour import ( + find_center_of_contours, find_new_features_of_contours, return_contours_of_image, return_parent_contours, @@ -22,8 +23,8 @@ def adhere_drop_capital_region_into_corresponding_textline( ): # print(np.shape(all_found_textline_polygons),np.shape(all_found_textline_polygons[3]),'all_found_textline_polygonsshape') # print(all_found_textline_polygons[3]) - cx_m, cy_m, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - cx_h, cy_h, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_h) + cx_m, cy_m = find_center_of_contours(contours_only_text_parent) + cx_h, cy_h = find_center_of_contours(contours_only_text_parent_h) cx_d, cy_d, _, _, y_min_d, y_max_d, _ = find_new_features_of_contours(polygons_of_drop_capitals) img_con_all = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3)) @@ -89,9 +90,9 @@ def adhere_drop_capital_region_into_corresponding_textline( region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1 # print(region_final,'region_final') - # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + # cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) try: - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -153,9 +154,9 @@ def adhere_drop_capital_region_into_corresponding_textline( # areas_main=np.array([cv2.contourArea(all_found_textline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_textline_polygons[int(region_final)]))]) - # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + # cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) try: - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -208,7 +209,7 @@ def adhere_drop_capital_region_into_corresponding_textline( try: # print(all_found_textline_polygons[j_cont][0]) - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -261,7 +262,7 @@ def adhere_drop_capital_region_into_corresponding_textline( else: pass - ##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + ##cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) ###print(all_box_coord[j_cont]) ###print(cx_t) ###print(cy_t) @@ -315,9 +316,9 @@ def adhere_drop_capital_region_into_corresponding_textline( region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1 # print(region_final,'region_final') - # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + # cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) try: - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -375,12 +376,12 @@ def adhere_drop_capital_region_into_corresponding_textline( # areas_main=np.array([cv2.contourArea(all_found_textline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_textline_polygons[int(region_final)]))]) - # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + # cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) # print(cx_t,'print') try: # print(all_found_textline_polygons[j_cont][0]) - cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[int(region_final)]) # print(all_box_coord[j_cont]) # print(cx_t) # print(cy_t) @@ -453,7 +454,7 @@ def adhere_drop_capital_region_into_corresponding_textline( #####try: #####if len(contours_new_parent)==1: ######print(all_found_textline_polygons[j_cont][0]) - #####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont]) + #####cx_t, cy_t = find_center_of_contours(all_found_textline_polygons[j_cont]) ######print(all_box_coord[j_cont]) ######print(cx_t) ######print(cy_t) diff --git a/src/eynollah/utils/separate_lines.py b/src/eynollah/utils/separate_lines.py index d41dda1..22ef00d 100644 --- a/src/eynollah/utils/separate_lines.py +++ b/src/eynollah/utils/separate_lines.py @@ -142,13 +142,12 @@ def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): rotation_matrix) def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help): - (h, w) = img_patch.shape[:2] + h, w = img_patch.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, -thetha, 1.0) x_d = M[0, 2] y_d = M[1, 2] - thetha = thetha / 180. * np.pi - rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]]) + rotation_matrix = M[:2, :2] contour_text_interest_copy = contour_text_interest.copy() x_cont = contour_text_interest[:, 0, 0] @@ -1302,19 +1301,16 @@ def separate_lines_new_inside_tiles(img_path, thetha): def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_ind, add_boxes_coor_into_textlines): kernel = np.ones((5, 5), np.uint8) - pixel = 255 + label = 255 min_area = 0 max_area = 1 - if len(img_patch.shape) == 3: - cnts_images = (img_patch[:, :, 0] == pixel) * 1 + if img_patch.ndim == 3: + cnts_images = (img_patch[:, :, 0] == label) * 1 else: - cnts_images = (img_patch[:, :] == pixel) * 1 - cnts_images = cnts_images.astype(np.uint8) - cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2) - imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY) - ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_imgs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + cnts_images = (img_patch[:, :] == label) * 1 + _, thresh = cv2.threshold(cnts_images.astype(np.uint8), 0, 255, 0) + contours_imgs, hierarchy = cv2.findContours(thresh.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours_imgs = return_parent_contours(contours_imgs, hierarchy) contours_imgs = filter_contours_area_of_image_tables(thresh, @@ -1322,14 +1318,12 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i max_area=max_area, min_area=min_area) cont_final = [] for i in range(len(contours_imgs)): - img_contour = np.zeros((cnts_images.shape[0], cnts_images.shape[1], 3)) - img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=(255, 255, 255)) - img_contour = img_contour.astype(np.uint8) + img_contour = np.zeros(cnts_images.shape[:2], dtype=np.uint8) + img_contour = cv2.fillPoly(img_contour, pts=[contours_imgs[i]], color=255) img_contour = cv2.dilate(img_contour, kernel, iterations=4) - imgrayrot = cv2.cvtColor(img_contour, cv2.COLOR_BGR2GRAY) - _, threshrot = cv2.threshold(imgrayrot, 0, 255, 0) - contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + _, threshrot = cv2.threshold(img_contour, 0, 255, 0) + contours_text_rot, _ = cv2.findContours(threshrot.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) ##contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[ ##0] @@ -1344,62 +1338,55 @@ def separate_lines_vertical_cont(img_patch, contour_text_interest, thetha, box_i def textline_contours_postprocessing(textline_mask, slope, contour_text_interest, box_ind, add_boxes_coor_into_textlines=False): - textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 - textline_mask = textline_mask.astype(np.uint8) + textline_mask = textline_mask * 255 kernel = np.ones((5, 5), np.uint8) textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, kernel) textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, kernel) textline_mask = cv2.erode(textline_mask, kernel, iterations=2) # textline_mask = cv2.erode(textline_mask, kernel, iterations=1) - try: - x_help = 30 - y_help = 2 - textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help), - textline_mask.shape[1] + int(2 * x_help), 3)) - textline_mask_help[y_help : y_help + textline_mask.shape[0], - x_help : x_help + textline_mask.shape[1], :] = np.copy(textline_mask[:, :, :]) + x_help = 30 + y_help = 2 - dst = rotate_image(textline_mask_help, slope) - dst = dst[:, :, 0] - dst[dst != 0] = 1 + textline_mask_help = np.zeros((textline_mask.shape[0] + int(2 * y_help), + textline_mask.shape[1] + int(2 * x_help))) + textline_mask_help[y_help : y_help + textline_mask.shape[0], + x_help : x_help + textline_mask.shape[1]] = np.copy(textline_mask[:, :]) - # if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3: - # plt.imshow(dst) - # plt.show() + dst = rotate_image(textline_mask_help, slope) + dst[dst != 0] = 1 - contour_text_copy = contour_text_interest.copy() - contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[0] - contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1] + # if np.abs(slope)>.5 and textline_mask.shape[0]/float(textline_mask.shape[1])>3: + # plt.imshow(dst) + # plt.show() - img_contour = np.zeros((box_ind[3], box_ind[2], 3)) - img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=(255, 255, 255)) + contour_text_copy = contour_text_interest.copy() + contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[0] + contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1] - img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help), - img_contour.shape[1] + int(2 * x_help), 3)) - img_contour_help[y_help : y_help + img_contour.shape[0], - x_help : x_help + img_contour.shape[1], :] = np.copy(img_contour[:, :, :]) + img_contour = np.zeros((box_ind[3], box_ind[2])) + img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=255) - img_contour_rot = rotate_image(img_contour_help, slope) + img_contour_help = np.zeros((img_contour.shape[0] + int(2 * y_help), + img_contour.shape[1] + int(2 * x_help))) + img_contour_help[y_help : y_help + img_contour.shape[0], + x_help : x_help + img_contour.shape[1]] = np.copy(img_contour[:, :]) - img_contour_rot = img_contour_rot.astype(np.uint8) - # dst_help = dst_help.astype(np.uint8) - imgrayrot = cv2.cvtColor(img_contour_rot, cv2.COLOR_BGR2GRAY) - _, threshrot = cv2.threshold(imgrayrot, 0, 255, 0) - contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + img_contour_rot = rotate_image(img_contour_help, slope) - len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))] - ind_big_con = np.argmax(len_con_text_rot) + _, threshrot = cv2.threshold(img_contour_rot, 0, 255, 0) + contours_text_rot, _ = cv2.findContours(threshrot.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - if abs(slope) > 45: - _, contours_rotated_clean = separate_lines_vertical_cont( - textline_mask, contours_text_rot[ind_big_con], box_ind, slope, - add_boxes_coor_into_textlines=add_boxes_coor_into_textlines) - else: - _, contours_rotated_clean = separate_lines( - dst, contours_text_rot[ind_big_con], slope, x_help, y_help) - except: - contours_rotated_clean = [] + len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))] + ind_big_con = np.argmax(len_con_text_rot) + + if abs(slope) > 45: + _, contours_rotated_clean = separate_lines_vertical_cont( + textline_mask, contours_text_rot[ind_big_con], box_ind, slope, + add_boxes_coor_into_textlines=add_boxes_coor_into_textlines) + else: + _, contours_rotated_clean = separate_lines( + dst, contours_text_rot[ind_big_con], slope, x_help, y_help) return contours_rotated_clean @@ -1605,7 +1592,7 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map @wrap_ndarray_shared(kw='textline_mask_tot_ea') def do_work_of_slopes_new( - box_text, contour, contour_par, index_r_con, + box_text, contour, contour_par, textline_mask_tot_ea=None, slope_deskew=0.0, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None ): @@ -1660,12 +1647,12 @@ def do_work_of_slopes_new( all_text_region_raw[mask_only_con_region == 0] = 0 cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contour_par, box_text) - return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope + return cnt_clean_rot, crop_coor, slope @wrap_ndarray_shared(kw='textline_mask_tot_ea') @wrap_ndarray_shared(kw='mask_texts_only') def do_work_of_slopes_new_curved( - box_text, contour, contour_par, index_r_con, + box_text, contour_par, textline_mask_tot_ea=None, mask_texts_only=None, num_col=1, scale_par=1.0, slope_deskew=0.0, logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None @@ -1756,11 +1743,11 @@ def do_work_of_slopes_new_curved( slope_for_all, contour_par, box_text, True) - return textlines_cnt_per_region[::-1], box_text, contour, contour_par, crop_coor, index_r_con, slope + return textlines_cnt_per_region[::-1], crop_coor, slope @wrap_ndarray_shared(kw='textline_mask_tot_ea') def do_work_of_slopes_new_light( - box_text, contour, contour_par, index_r_con, + box_text, contour, contour_par, textline_mask_tot_ea=None, slope_deskew=0, textline_light=True, logger=None ): @@ -1790,4 +1777,4 @@ def do_work_of_slopes_new_light( all_text_region_raw[mask_only_con_region == 0] = 0 cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_deskew, contour_par, box_text) - return cnt_clean_rot, box_text, contour, contour_par, crop_coor, index_r_con, slope_deskew + return cnt_clean_rot, crop_coor, slope_deskew diff --git a/src/eynollah/utils/utils_ocr.py b/src/eynollah/utils/utils_ocr.py index 602ad6e..6e71b0f 100644 --- a/src/eynollah/utils/utils_ocr.py +++ b/src/eynollah/utils/utils_ocr.py @@ -1,13 +1,17 @@ +import math +import copy + import numpy as np import cv2 import tensorflow as tf from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d -import math from PIL import Image, ImageDraw, ImageFont from Bio import pairwise2 + from .resize import resize_image + def decode_batch_predictions(pred, num_to_char, max_len = 128): # input_len is the product of the batch size and the # number of time steps. @@ -370,7 +374,9 @@ def return_textline_contour_with_added_box_coordinate(textline_contour, box_ind return textline_contour -def return_rnn_cnn_ocr_of_given_textlines(image, all_found_textline_polygons, +def return_rnn_cnn_ocr_of_given_textlines(image, + all_found_textline_polygons, + all_box_coord, prediction_model, b_s_ocr, num_to_char, textline_light=False, diff --git a/src/eynollah/utils/xml.py b/src/eynollah/utils/xml.py index 13420df..88d1df8 100644 --- a/src/eynollah/utils/xml.py +++ b/src/eynollah/utils/xml.py @@ -57,19 +57,15 @@ def xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_margina og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal)) region_counter.inc('region') - for idx_textregion, _ in enumerate(order_of_texts): - og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=region_counter.region_id(order_of_texts[idx_textregion] + 1))) + for idx_textregion in order_of_texts: + og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=region_counter.region_id(idx_textregion + 1))) region_counter.inc('region') for id_marginal in id_of_marginalia_right: og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal)) region_counter.inc('region') -def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point): - indexes_sorted = np.array(indexes_sorted) - index_of_types = np.array(index_of_types) - kind_of_texts = np.array(kind_of_texts) - +def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, indexes_sorted, index_of_types, kind_of_texts, ref_point): id_of_texts = [] order_of_texts = [] diff --git a/src/eynollah/writer.py b/src/eynollah/writer.py index 936c95f..8859d95 100644 --- a/src/eynollah/writer.py +++ b/src/eynollah/writer.py @@ -56,113 +56,30 @@ class EynollahXmlWriter(): points_page_print = points_page_print + ' ' return points_page_print[:-1] - def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter, ocr_all_textlines_textregion): - for j in range(len(all_found_textline_polygons_marginals[marginal_idx])): - coords = CoordsType() - textline = TextLineType(id=counter.next_line_id, Coords=coords) - if ocr_all_textlines_textregion: - textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) - marginal_region.add_TextLine(textline) - marginal_region.set_orientation(-slopes_marginals[marginal_idx]) - points_co = '' - for l in range(len(all_found_textline_polygons_marginals[marginal_idx][j])): - if not (self.curved_line or self.textline_light): - if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2: - textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) ) - textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) ) - else: - textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) ) - textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) ) - points_co += str(textline_x_coord) - points_co += ',' - points_co += str(textline_y_coord) - if (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) <= 45: - if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y)) - else: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y)) - - elif (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) > 45: - if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) - else: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) - points_co += ' ' - coords.set_points(points_co[:-1]) - def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): self.logger.debug('enter serialize_lines_in_region') - for j in range(len(all_found_textline_polygons[region_idx])): + for j, polygon_textline in enumerate(all_found_textline_polygons[region_idx]): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) if ocr_all_textlines_textregion: - textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) + # FIXME: add OCR confidence + textline.set_TextEquiv([TextEquivType(Unicode=ocr_all_textlines_textregion[j])]) text_region.add_TextLine(textline) text_region.set_orientation(-slopes[region_idx]) region_bboxes = all_box_coord[region_idx] points_co = '' - for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[region_idx][j]): - if not (self.curved_line or self.textline_light): - if len(contour_textline) == 2: - textline_x_coord = max(0, int((contour_textline[0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) - textline_y_coord = max(0, int((contour_textline[1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) - else: - textline_x_coord = max(0, int((contour_textline[0][0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) - textline_y_coord = max(0, int((contour_textline[0][1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) - points_co += str(textline_x_coord) - points_co += ',' - points_co += str(textline_y_coord) - - if self.textline_light or (self.curved_line and np.abs(slopes[region_idx]) <= 45): - if len(contour_textline) == 2: - points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((contour_textline[1] + page_coord[0]) / self.scale_y)) - else: - points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y)) - elif self.curved_line and np.abs(slopes[region_idx]) > 45: - if len(contour_textline)==2: - points_co += str(int((contour_textline[0] + region_bboxes[2] + page_coord[2])/self.scale_x)) - points_co += ',' - points_co += str(int((contour_textline[1] + region_bboxes[0] + page_coord[0])/self.scale_y)) - else: - points_co += str(int((contour_textline[0][0] + region_bboxes[2]+page_coord[2])/self.scale_x)) - points_co += ',' - points_co += str(int((contour_textline[0][1] + region_bboxes[0]+page_coord[0])/self.scale_y)) - points_co += ' ' - coords.set_points(points_co[:-1]) - - def serialize_lines_in_dropcapital(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): - self.logger.debug('enter serialize_lines_in_region') - for j in range(1): - coords = CoordsType() - textline = TextLineType(id=counter.next_line_id, Coords=coords) - if ocr_all_textlines_textregion: - textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) - text_region.add_TextLine(textline) - #region_bboxes = all_box_coord[region_idx] - points_co = '' - for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[j]): - if len(contour_textline) == 2: - points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((contour_textline[1] + page_coord[0]) / self.scale_y)) - else: - points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y)) - - points_co += ' ' + for point in polygon_textline: + if len(point) != 2: + point = point[0] + point_x = point[0] + page_coord[2] + point_y = point[1] + page_coord[0] + # FIXME: or actually... not self.textline_light and not self.curved_line or np.abs(slopes[region_idx]) > 45? + if not self.textline_light and not (self.curved_line and np.abs(slopes[region_idx]) <= 45): + point_x += region_bboxes[2] + point_y += region_bboxes[0] + point_x = max(0, int(point_x / self.scale_x)) + point_y = max(0, int(point_y / self.scale_y)) + points_co += str(point_x) + ',' + str(point_y) + ' ' coords.set_points(points_co[:-1]) def write_pagexml(self, pcgts): @@ -170,8 +87,50 @@ class EynollahXmlWriter(): with open(self.output_filename, 'w') as f: f.write(to_xml(pcgts)) - def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals_left, found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, conf_contours_textregion=None, skip_layout_reading_order=False): - self.logger.debug('enter build_pagexml_no_full_layout') + def build_pagexml_no_full_layout( + self, found_polygons_text_region, + page_coord, order_of_texts, id_of_texts, + all_found_textline_polygons, + all_box_coord, + found_polygons_text_region_img, + found_polygons_marginals_left, found_polygons_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, + found_polygons_tables, + **kwargs): + return self.build_pagexml_full_layout( + found_polygons_text_region, [], + page_coord, order_of_texts, id_of_texts, + all_found_textline_polygons, [], + all_box_coord, [], + found_polygons_text_region_img, found_polygons_tables, [], + found_polygons_marginals_left, found_polygons_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, [], slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, + **kwargs) + + def build_pagexml_full_layout( + self, + found_polygons_text_region, found_polygons_text_region_h, + page_coord, order_of_texts, id_of_texts, + all_found_textline_polygons, all_found_textline_polygons_h, + all_box_coord, all_box_coord_h, + found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, + found_polygons_marginals_left,found_polygons_marginals_right, + all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, + all_box_coord_marginals_left, all_box_coord_marginals_right, + slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, + cont_page, polygons_seplines, + ocr_all_textlines=None, ocr_all_textlines_h=None, + ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, + ocr_all_textlines_drop=None, + conf_contours_textregions=None, conf_contours_textregions_h=None, + skip_layout_reading_order=False): + self.logger.debug('enter build_pagexml') # create the file structure pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org) @@ -179,191 +138,116 @@ class EynollahXmlWriter(): page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page)))) counter = EynollahIdCounter() - if len(found_polygons_text_region) > 0: + if len(order_of_texts): _counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) - id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left] - id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right] + id_of_marginalia_left = [_counter_marginals.next_region_id + for _ in found_polygons_marginals_left] + id_of_marginalia_right = [_counter_marginals.next_region_id + for _ in found_polygons_marginals_right] xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right) - for mm in range(len(found_polygons_text_region)): - textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord, skip_layout_reading_order), conf=conf_contours_textregion[mm]), - ) - #textregion.set_conf(conf_contours_textregion[mm]) + for mm, region_contour in enumerate(found_polygons_text_region): + textregion = TextRegionType( + id=counter.next_region_id, type_='paragraph', + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord, + skip_layout_reading_order)) + ) + if conf_contours_textregions: + textregion.Coords.set_conf(conf_contours_textregions[mm]) page.add_TextRegion(textregion) if ocr_all_textlines: ocr_textlines = ocr_all_textlines[mm] else: ocr_textlines = None - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) - - for mm in range(len(found_polygons_marginals_left)): - marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord))) - page.add_TextRegion(marginal) - if ocr_all_textlines_marginals_left: - ocr_textlines = ocr_all_textlines_marginals_left[mm] - else: - ocr_textlines = None - - #print(ocr_textlines, mm, len(all_found_textline_polygons_marginals_left[mm]) ) - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines) - - for mm in range(len(found_polygons_marginals_right)): - marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord))) - page.add_TextRegion(marginal) - if ocr_all_textlines_marginals_right: - ocr_textlines = ocr_all_textlines_marginals_right[mm] - else: - ocr_textlines = None - - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines) - - for mm in range(len(found_polygons_text_region_img)): - img_region = ImageRegionType(id=counter.next_region_id, Coords=CoordsType()) - page.add_ImageRegion(img_region) - points_co = '' - for lmm in range(len(found_polygons_text_region_img[mm])): - try: - points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y)) - points_co += ' ' - except: - - points_co += str(int((found_polygons_text_region_img[mm][lmm][0] + page_coord[2])/ self.scale_x )) - points_co += ',' - points_co += str(int((found_polygons_text_region_img[mm][lmm][1] + page_coord[0])/ self.scale_y )) - points_co += ' ' - - img_region.get_Coords().set_points(points_co[:-1]) - - for mm in range(len(polygons_lines_to_be_written_in_xml)): - sep_hor = SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType()) - page.add_SeparatorRegion(sep_hor) - points_co = '' - for lmm in range(len(polygons_lines_to_be_written_in_xml[mm])): - points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm,0,0] ) / self.scale_x)) - points_co += ',' - points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm,0,1] ) / self.scale_y)) - points_co += ' ' - sep_hor.get_Coords().set_points(points_co[:-1]) - for mm in range(len(found_polygons_tables)): - tab_region = TableRegionType(id=counter.next_region_id, Coords=CoordsType()) - page.add_TableRegion(tab_region) - points_co = '' - for lmm in range(len(found_polygons_tables[mm])): - points_co += str(int((found_polygons_tables[mm][lmm,0,0] + page_coord[2]) / self.scale_x)) - points_co += ',' - points_co += str(int((found_polygons_tables[mm][lmm,0,1] + page_coord[0]) / self.scale_y)) - points_co += ' ' - tab_region.get_Coords().set_points(points_co[:-1]) - - return pcgts - - def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals_left,found_polygons_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines=None, ocr_all_textlines_h=None, ocr_all_textlines_marginals_left=None, ocr_all_textlines_marginals_right=None, ocr_all_textlines_drop=None, conf_contours_textregion=None, conf_contours_textregion_h=None): - self.logger.debug('enter build_pagexml_full_layout') - - # create the file structure - pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org) - page = pcgts.get_Page() - page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page)))) - - counter = EynollahIdCounter() - _counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) - id_of_marginalia_left = [_counter_marginals.next_region_id for _ in found_polygons_marginals_left] - id_of_marginalia_right = [_counter_marginals.next_region_id for _ in found_polygons_marginals_right] - xml_reading_order(page, order_of_texts, id_of_marginalia_left, id_of_marginalia_right) - - for mm in range(len(found_polygons_text_region)): - textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord), conf=conf_contours_textregion[mm])) - page.add_TextRegion(textregion) - - if ocr_all_textlines: - ocr_textlines = ocr_all_textlines[mm] - else: - ocr_textlines = None - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) + self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, + all_box_coord, slopes, counter, ocr_textlines) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) - for mm in range(len(found_polygons_text_region_h)): - textregion = TextRegionType(id=counter.next_region_id, type_='heading', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) + for mm, region_contour in enumerate(found_polygons_text_region_h): + textregion = TextRegionType( + id=counter.next_region_id, type_='heading', + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) + ) + if conf_contours_textregions_h: + textregion.Coords.set_conf(conf_contours_textregions_h[mm]) page.add_TextRegion(textregion) - if ocr_all_textlines_h: ocr_textlines = ocr_all_textlines_h[mm] else: ocr_textlines = None - self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines) + self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, + all_box_coord_h, slopes_h, counter, ocr_textlines) - for mm in range(len(found_polygons_marginals_left)): - marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_left[mm], page_coord))) + for mm, region_contour in enumerate(found_polygons_marginals_left): + marginal = TextRegionType( + id=counter.next_region_id, type_='marginalia', + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) + ) page.add_TextRegion(marginal) if ocr_all_textlines_marginals_left: ocr_textlines = ocr_all_textlines_marginals_left[mm] else: ocr_textlines = None - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines) - - for mm in range(len(found_polygons_marginals_right)): - marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals_right[mm], page_coord))) + self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines) + + for mm, region_contour in enumerate(found_polygons_marginals_right): + marginal = TextRegionType( + id=counter.next_region_id, type_='marginalia', + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) + ) page.add_TextRegion(marginal) if ocr_all_textlines_marginals_right: ocr_textlines = ocr_all_textlines_marginals_right[mm] else: ocr_textlines = None - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines) - - for mm in range(len(found_polygons_drop_capitals)): - dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))) + self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_right, mm, page_coord, + all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines) + + for mm, region_contour in enumerate(found_polygons_drop_capitals): + dropcapital = TextRegionType( + id=counter.next_region_id, type_='drop-capital', + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) + ) page.add_TextRegion(dropcapital) - all_box_coord_drop = None - slopes_drop = None + all_box_coord_drop = [[0, 0, 0, 0]] + slopes_drop = [0] if ocr_all_textlines_drop: ocr_textlines = ocr_all_textlines_drop[mm] else: ocr_textlines = None - self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=ocr_textlines) + self.serialize_lines_in_region(dropcapital, [[found_polygons_drop_capitals[mm]]], 0, page_coord, + all_box_coord_drop, slopes_drop, counter, ocr_textlines) - for mm in range(len(found_polygons_text_region_img)): - page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) + for region_contour in found_polygons_text_region_img: + page.add_ImageRegion( + ImageRegionType(id=counter.next_region_id, + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)))) - for mm in range(len(polygons_lines_to_be_written_in_xml)): - page.add_SeparatorRegion(SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) + for region_contour in polygons_seplines: + page.add_SeparatorRegion( + SeparatorRegionType(id=counter.next_region_id, + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, [0, 0, 0, 0])))) - for mm in range(len(found_polygons_tables)): - page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord)))) + for region_contour in found_polygons_tables: + page.add_TableRegion( + TableRegionType(id=counter.next_region_id, + Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)))) return pcgts def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False): self.logger.debug('enter calculate_polygon_coords') coords = '' - for value_bbox in contour: - if skip_layout_reading_order: - if len(value_bbox) == 2: - coords += str(int((value_bbox[0]) / self.scale_x)) - coords += ',' - coords += str(int((value_bbox[1]) / self.scale_y)) - else: - coords += str(int((value_bbox[0][0]) / self.scale_x)) - coords += ',' - coords += str(int((value_bbox[0][1]) / self.scale_y)) - else: - if len(value_bbox) == 2: - coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x)) - coords += ',' - coords += str(int((value_bbox[1] + page_coord[0]) / self.scale_y)) - else: - coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x)) - coords += ',' - coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y)) - coords=coords + ' ' + for point in contour: + if len(point) != 2: + point = point[0] + point_x = point[0] + point_y = point[1] + if not skip_layout_reading_order: + point_x += page_coord[2] + point_y += page_coord[0] + point_x = int(point_x / self.scale_x) + point_y = int(point_y / self.scale_y) + coords += str(point_x) + ',' + str(point_y) + ' ' return coords[:-1] diff --git a/tests/test_run.py b/tests/test_run.py index 59e5099..79c64c2 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -17,21 +17,26 @@ from ocrd_models.constants import NAMESPACES as NS testdir = Path(__file__).parent.resolve() MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_5_0').resolve())) -MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_0').resolve())) +MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_1').resolve())) MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve())) @pytest.mark.parametrize( "options", [ [], # defaults - ["--allow_scaling", "--curved-line"], + #["--allow_scaling", "--curved-line"], ["--allow_scaling", "--curved-line", "--full-layout"], ["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based"], ["--allow_scaling", "--curved-line", "--full-layout", "--reading_order_machine_based", "--textline_light", "--light_version"], # -ep ... # -eoi ... - # --do_ocr + # FIXME: find out whether OCR extra was installed, otherwise skip these + ["--do_ocr"], + ["--do_ocr", "--light_version", "--textline_light"], + ["--do_ocr", "--transformer_ocr"], + #["--do_ocr", "--transformer_ocr", "--light_version", "--textline_light"], + ["--do_ocr", "--transformer_ocr", "--light_version", "--textline_light", "--full-layout"], # --skip_layout_and_reading_order ], ids=str) def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options): @@ -62,6 +67,44 @@ def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options): lines = tree.xpath("//page:TextLine", namespaces=NS) assert len(lines) == 31, "result is inaccurate" # 29 paragraph lines, 1 page and 1 catch-word line +@pytest.mark.parametrize( + "options", + [ + ["--tables"], + ["--tables", "--full-layout"], + ["--tables", "--full-layout", "--textline_light", "--light_version"], + ], ids=str) +def test_run_eynollah_layout_filename2(tmp_path, pytestconfig, caplog, options): + infile = testdir.joinpath('resources/euler_rechenkunst01_1738_0025.tif') + outfile = tmp_path / 'euler_rechenkunst01_1738_0025.xml' + args = [ + '-m', MODELS_LAYOUT, + '-i', str(infile), + '-o', str(outfile.parent), + ] + if pytestconfig.getoption('verbose') > 0: + args.extend(['-l', 'DEBUG']) + caplog.set_level(logging.INFO) + def only_eynollah(logrec): + return logrec.name == 'eynollah' + runner = CliRunner() + with caplog.filtering(only_eynollah): + result = runner.invoke(layout_cli, args + options, catch_exceptions=False) + assert result.exit_code == 0, result.stdout + logmsgs = [logrec.message for logrec in caplog.records] + assert str(infile) in logmsgs + assert outfile.exists() + tree = page_from_file(str(outfile)).etree + regions = tree.xpath("//page:TextRegion", namespaces=NS) + assert len(regions) >= 2, "result is inaccurate" + regions = tree.xpath("//page:TableRegion", namespaces=NS) + # model/decoding is not very precise, so (depending on mode) we can get fractures/splits/FP + assert len(regions) >= 1, "result is inaccurate" + regions = tree.xpath("//page:SeparatorRegion", namespaces=NS) + assert len(regions) >= 2, "result is inaccurate" + lines = tree.xpath("//page:TextLine", namespaces=NS) + assert len(lines) >= 2, "result is inaccurate" # mostly table (if detected correctly), but 1 page and 1 catch-word line + def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog): indir = testdir.joinpath('resources') outdir = tmp_path diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 252213f..e2b323a 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -2,6 +2,5 @@ def test_utils_import(): import eynollah.utils import eynollah.utils.contour import eynollah.utils.drop_capitals - import eynollah.utils.drop_capitals import eynollah.utils.is_nan import eynollah.utils.rotate