From b85a9dc2561f16b1a8908332f8536eabb7e37564 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 10 Oct 2019 16:13:07 +0200 Subject: [PATCH 01/47] =?UTF-8?q?=F0=9F=A7=B9=20sbb=5Ftextline=5Fdocker:?= =?UTF-8?q?=20Rename=20to=20sbb=5Ftextline=5Fdetector?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitkeep | 0 Dockerfile | 8 + README.md | 37 ++ main.py | 1333 ++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 10 + setup.py | 34 ++ 6 files changed, 1422 insertions(+) create mode 100644 .gitkeep create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 main.py create mode 100644 requirements.txt create mode 100644 setup.py diff --git a/.gitkeep b/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..020db6f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3 + +ADD main.py / +ADD requirements.txt / + +RUN pip install --proxy=http-proxy.sbb.spk-berlin.de:3128 -r requirements.txt + +ENTRYPOINT ["python", "./main.py"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..a0180f1 --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# Textline-Recognition + +*** + +# Installation: + +Setup virtual environment: +``` +virtualenv --python=python3.6 venv +``` + +Activate virtual environment: +``` +source venv/bin/activate +``` + +Upgrade pip: +``` +pip install -U pip +``` + +Install package together with its dependencies in development mode: +``` +pip install -e ./ +``` + +*** + +Perform document structure and textline analysis on a +scanned document image and save the result as PAGE XML. + +### Usage +``` +text_line_recognition --help +``` + + diff --git a/main.py b/main.py new file mode 100644 index 0000000..1701382 --- /dev/null +++ b/main.py @@ -0,0 +1,1333 @@ +#! /usr/bin/env python3 + +__version__ = '1.0' + +import os +import sys +import cv2 +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +from sys import getsizeof +import random +from tqdm import tqdm +from keras.models import model_from_json +from keras.models import load_model +import math +from shapely import geometry +from sklearn.cluster import KMeans +import gc +from keras import backend as K +import tensorflow as tf +from scipy.signal import find_peaks +from scipy.ndimage import gaussian_filter1d +import xml.etree.ElementTree as ET +import warnings +import argparse + +with warnings.catch_warnings(): + warnings.simplefilter("ignore") + +__doc__ = \ + """ + tool to extract table form data from alto xml data + """ + + +class textlineerkenner: + def __init__(self, image_dir, dir_out, dir_models): + self.image_dir = image_dir + self.dir_out = dir_out + self.dir_models = dir_models + try: + self.f_name = image_dir.split('/')[len(image_dir.split('/')) - 1] + self.f_name = self.f_name.split('.')[0] + print(self.f_name) + except: + self.f_name = self.f_name.split('.')[0] + self.kernel = np.ones((5, 5), np.uint8) + self.model_page_dir = dir_models + '/model_page.h5' + self.model_region_dir = dir_models + '/model_strukturerkennung.h5' + self.model_textline_dir = dir_models + '/model_textline.h5' + + def find_polugons_size_filter(self, contours, median_area, scaler_up=1.2, scaler_down=0.8): + found_polygons_early = list() + + for c in contours: + if len(c) < 3: # A polygon cannot have less than 3 points + continue + + polygon = geometry.Polygon([point[0] for point in c]) + area = polygon.area + # Check that polygon has area greater than minimal area + if area >= median_area * scaler_down and area <= median_area * scaler_up: + found_polygons_early.append( + np.array([point for point in polygon.exterior.coords], dtype=np.uint)) + return found_polygons_early + + def filter_contours_area_of_image(self, image, contours, hirarchy, max_area, min_area): + found_polygons_early = list() + + jv = 0 + for c in contours: + if len(c) < 3: # A polygon cannot have less than 3 points + continue + + polygon = geometry.Polygon([point[0] for point in c]) + area = polygon.area + if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod( + image.shape[:2]): # and hirarchy[0][jv][3]==-1 : + found_polygons_early.append( + np.array([point for point in polygon.exterior.coords], dtype=np.uint)) + jv += 1 + return found_polygons_early + + def filter_contours_area_of_image_interiors(self, image, contours, hirarchy, max_area, min_area): + found_polygons_early = list() + + jv = 0 + for c in contours: + if len(c) < 3: # A polygon cannot have less than 3 points + continue + + polygon = geometry.Polygon([point[0] for point in c]) + area = polygon.area + if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and \ + hirarchy[0][jv][3] != -1: + # print(c[0][0][1]) + found_polygons_early.append( + np.array([point for point in polygon.exterior.coords], dtype=np.uint)) + jv += 1 + return found_polygons_early + + def resize_image(self, img_in, input_height, input_width): + return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) + + def resize_ann(self, seg_in, input_height, input_width): + return cv2.resize(seg_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) + + def get_one_hot(self, seg, input_height, input_width, n_classes): + seg = seg[:, :, 0] + seg_f = np.zeros((input_height, input_width, n_classes)) + for j in range(n_classes): + seg_f[:, :, j] = (seg == j).astype(int) + return seg_f + + def jaccard_distance_loss(self, y_true, y_pred, smooth=100): + """ + Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|) + = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|)) + + The jaccard distance loss is usefull for unbalanced datasets. This has been + shifted so it converges on 0 and is smoothed to avoid exploding or disapearing + gradient. + + Ref: https://en.wikipedia.org/wiki/Jaccard_index + + @url: https://gist.github.com/wassname/f1452b748efcbeb4cb9b1d059dce6f96 + @author: wassname + """ + intersection = K.sum(K.abs(y_true * y_pred), axis=-1) + sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1) + jac = (intersection + smooth) / (sum_ - intersection + smooth) + return (1 - jac) * smooth + + def soft_dice_loss(self, y_true, y_pred, epsilon=1e-6): + ''' + Soft dice loss calculation for arbitrary batch size, number of classes, and number of spatial dimensions. + Assumes the `channels_last` format. + + # Arguments + y_true: b x X x Y( x Z...) x c One hot encoding of ground truth + y_pred: b x X x Y( x Z...) x c Network output, must sum to 1 over c channel (such as after softmax) + epsilon: Used for numerical stability to avoid divide by zero errors + + # References + V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation + https://arxiv.org/abs/1606.04797 + More details on Dice loss formulation + https://mediatum.ub.tum.de/doc/1395260/1395260.pdf (page 72) + + Adapted from https://github.com/Lasagne/Recipes/issues/99#issuecomment-347775022 + ''' + + # skip the batch and class axis for calculating Dice score + axes = tuple(range(1, len(y_pred.shape) - 1)) + + numerator = 2. * K.sum(y_pred * y_true, axes) + + denominator = K.sum(K.square(y_pred) + K.square(y_true), axes) + return 1.00 - K.mean(numerator / (denominator + epsilon)) # average over classes and batch + + def weighted_categorical_crossentropy(self, weights=None): + """ weighted_categorical_crossentropy + + Args: + * weights: crossentropy weights + Returns: + * weighted categorical crossentropy function + """ + + def loss(y_true, y_pred): + labels_floats = tf.cast(y_true, tf.float32) + per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats, logits=y_pred) + + if weights is not None: + weight_mask = tf.maximum(tf.reduce_max(tf.constant( + np.array(weights, dtype=np.float32)[None, None, None]) + * labels_floats, axis=-1), 1.0) + per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None] + return tf.reduce_mean(per_pixel_loss) + + return loss + + def seg_metrics(self, y_true, y_pred, metric_name, metric_type='standard', drop_last=True, mean_per_class=False, + verbose=False): + flag_soft = (metric_type == 'soft') + flag_naive_mean = (metric_type == 'naive') + + # always assume one or more classes + num_classes = K.shape(y_true)[-1] + + if not flag_soft: + # get one-hot encoded masks from y_pred (true masks should already be one-hot) + y_pred = K.one_hot(K.argmax(y_pred), num_classes) + y_true = K.one_hot(K.argmax(y_true), num_classes) + + # if already one-hot, could have skipped above command + # keras uses float32 instead of float64, would give error down (but numpy arrays or keras.to_categorical gives float64) + y_true = K.cast(y_true, 'float32') + y_pred = K.cast(y_pred, 'float32') + + # intersection and union shapes are batch_size * n_classes (values = area in pixels) + axes = (1, 2) # W,H axes of each image + intersection = K.sum(K.abs(y_true * y_pred), axis=axes) + mask_sum = K.sum(K.abs(y_true), axis=axes) + K.sum(K.abs(y_pred), axis=axes) + union = mask_sum - intersection # or, np.logical_or(y_pred, y_true) for one-hot + + smooth = .001 + iou = (intersection + smooth) / (union + smooth) + dice = 2 * (intersection + smooth) / (mask_sum + smooth) + + metric = {'iou': iou, 'dice': dice}[metric_name] + + # define mask to be 0 when no pixels are present in either y_true or y_pred, 1 otherwise + mask = K.cast(K.not_equal(union, 0), 'float32') + + if drop_last: + metric = metric[:, :-1] + mask = mask[:, :-1] + + if verbose: + print('intersection, union') + print(K.eval(intersection), K.eval(union)) + print(K.eval(intersection / union)) + + # return mean metrics: remaining axes are (batch, classes) + if flag_naive_mean: + return K.mean(metric) + + # take mean only over non-absent classes + class_count = K.sum(mask, axis=0) + non_zero = tf.greater(class_count, 0) + non_zero_sum = tf.boolean_mask(K.sum(metric * mask, axis=0), non_zero) + non_zero_count = tf.boolean_mask(class_count, non_zero) + + if verbose: + print('Counts of inputs with class present, metrics for non-absent classes') + print(K.eval(class_count), K.eval(non_zero_sum / non_zero_count)) + + return K.mean(non_zero_sum / non_zero_count) + + def mean_iou(self, y_true, y_pred, **kwargs): + return self.seg_metrics(y_true, y_pred, metric_name='iou', **kwargs) + + def Mean_IOU(self, y_true, y_pred): + nb_classes = K.int_shape(y_pred)[-1] + iou = [] + true_pixels = K.argmax(y_true, axis=-1) + pred_pixels = K.argmax(y_pred, axis=-1) + void_labels = K.equal(K.sum(y_true, axis=-1), 0) + for i in range(0, nb_classes): # exclude first label (background) and last label (void) + true_labels = K.equal(true_pixels, i) # & ~void_labels + pred_labels = K.equal(pred_pixels, i) # & ~void_labels + inter = tf.to_int32(true_labels & pred_labels) + union = tf.to_int32(true_labels | pred_labels) + legal_batches = K.sum(tf.to_int32(true_labels), axis=1) > 0 + ious = K.sum(inter, axis=1) / K.sum(union, axis=1) + iou.append( + K.mean(tf.gather(ious, indices=tf.where(legal_batches)))) # returns average IoU of the same objects + iou = tf.stack(iou) + legal_labels = ~tf.debugging.is_nan(iou) + iou = tf.gather(iou, indices=tf.where(legal_labels)) + return K.mean(iou) + + def IoU(self, Yi, y_predi): + ## mean Intersection over Union + ## Mean IoU = TP/(FN + TP + FP) + + IoUs = [] + Nclass = np.unique(Yi) + for c in Nclass: + TP = np.sum((Yi == c) & (y_predi == c)) + FP = np.sum((Yi != c) & (y_predi == c)) + FN = np.sum((Yi == c) & (y_predi != c)) + IoU = TP / float(TP + FP + FN) + print("class {:02.0f}: #TP={:6.0f}, #FP={:6.0f}, #FN={:5.0f}, IoU={:4.3f}".format(c, TP, FP, FN, IoU)) + IoUs.append(IoU) + mIoU = np.mean(IoUs) + print("_________________") + print("Mean IoU: {:4.3f}".format(mIoU)) + return mIoU + + def IoU_case(self, Yi, y_predi, n_classes): + ## mean Intersection over Union + ## Mean IoU = TP/(FN + TP + FP) + + IoUs = [] + + Nclass = n_classes + for c in range(Nclass): + TP = np.sum((Yi == c) & (y_predi == c)) + FP = np.sum((Yi != c) & (y_predi == c)) + FN = np.sum((Yi == c) & (y_predi != c)) + IoUs.append(np.array([TP, FP, FN])) + return IoUs + + def color_images(self, seg, n_classes): + ann_u = range(n_classes) + if len(np.shape(seg)) == 3: + seg = seg[:, :, 0] + + seg_img = np.zeros((np.shape(seg)[0], np.shape(seg)[1], 3)).astype(np.uint8) + colors = sns.color_palette("hls", n_classes) + + for c in ann_u: + c = int(c) + segl = (seg == c) + seg_img[:, :, 0] = segl * c + seg_img[:, :, 1] = segl * c + seg_img[:, :, 2] = segl * c + return seg_img + + def color_images_diva(self, seg, n_classes): + ann_u = range(n_classes) + if len(np.shape(seg)) == 3: + seg = seg[:, :, 0] + + seg_img = np.zeros((np.shape(seg)[0], np.shape(seg)[1], 3)).astype(float) + # colors=sns.color_palette("hls", n_classes) + colors = [[1, 0, 0], [8, 0, 0], [2, 0, 0], [4, 0, 0]] + + for c in ann_u: + c = int(c) + segl = (seg == c) + seg_img[:, :, 0][seg == c] = colors[c][0] # segl*(colors[c][0]) + seg_img[:, :, 1][seg == c] = colors[c][1] # seg_img[:,:,1]=segl*(colors[c][1]) + seg_img[:, :, 2][seg == c] = colors[c][2] # seg_img[:,:,2]=segl*(colors[c][2]) + return seg_img + + def rotate_image(self, img_patch, slope): + (h, w) = img_patch.shape[:2] + center = (w // 2, h // 2) + M = cv2.getRotationMatrix2D(center, slope, 1.0) + return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) + + def cleaning_probs(self, probs: np.ndarray, sigma: float) -> np.ndarray: + # Smooth + if sigma > 0.: + return cv2.GaussianBlur(probs, (int(3 * sigma) * 2 + 1, int(3 * sigma) * 2 + 1), sigma) + elif sigma == 0.: + return cv2.fastNlMeansDenoising((probs * 255).astype(np.uint8), h=20) / 255 + else: # Negative sigma, do not do anything + return probs + + def crop_image_inside_box(self, box, img_org_copy): + image_box = img_org_copy[box[1]:box[1] + box[3], box[0]:box[0] + box[2]] + return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] + + def otsu_copy(self, img): + img_r = np.zeros(img.shape) + img1 = img[:, :, 0] + img2 = img[:, :, 1] + img3 = img[:, :, 2] + # print(img.min()) + # print(img[:,:,0].min()) + # blur = cv2.GaussianBlur(img,(5,5)) + # ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) + retval1, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + retval2, threshold2 = cv2.threshold(img2, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + retval3, threshold3 = cv2.threshold(img3, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + + img_r[:, :, 0] = threshold1 + img_r[:, :, 1] = threshold1 + img_r[:, :, 2] = threshold1 + return img_r + + def get_image_and_scales(self): + self.image = cv2.imread(self.image_dir) + self.height_org = self.image.shape[0] + self.width_org = self.image.shape[1] + + if self.image.shape[0] < 1000: + self.img_hight_int = 1800 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + + elif self.image.shape[0] < 2000 and self.image.shape[0] >= 1000: + self.img_hight_int = 3500 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + + elif self.image.shape[0] < 3000 and self.image.shape[0] >= 2000: + self.img_hight_int = 4000 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + + elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3000: + self.img_hight_int = 4500 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + + else: + self.img_hight_int = self.image.shape[0] + self.img_width_int = self.image.shape[1] + + self.scale_y = self.img_hight_int / float(self.image.shape[0]) + self.scale_x = self.img_width_int / float(self.image.shape[1]) + + self.image = self.resize_image(self.image, self.img_hight_int, self.img_width_int) + + def start_new_session_and_model(self, model_dir): + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + + session = tf.InteractiveSession() + model = load_model(model_dir, custom_objects={'mean_iou': self.mean_iou, + 'soft_dice_loss': self.soft_dice_loss, + 'jaccard_distance_loss': self.jaccard_distance_loss, + 'Mean_IOU': self.Mean_IOU}) + + return model, session + + def extract_page(self): + model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + + img_height_page = model_page.layers[len(model_page.layers) - 1].output_shape[1] + img_width_page = model_page.layers[len(model_page.layers) - 1].output_shape[2] + n_classes_page = model_page.layers[len(model_page.layers) - 1].output_shape[3] + + img_org_copy = self.image.copy() + + img = self.otsu_copy(self.image) + + for ii in range(60): + img = cv2.GaussianBlur(img, (15, 15), 0) + + # img=self.image.astype(np.uint8) + # img = cv2.medianBlur(img,5) + + img = img / 255.0 + img = self.resize_image(img, img_height_page, img_width_page) + + label_p_pred = model_page.predict( + img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + + seg = np.argmax(label_p_pred, axis=3)[0] + seg_color = self.color_images(seg, n_classes_page) + + imgs = seg_color # /np.max(seg_color)*255#np.repeat(seg_color[:, :, np.newaxis], 3, axis=2) + + imgs = self.resize_image(imgs, img_org_copy.shape[0], img_org_copy.shape[1]) + + # plt.imshow(imgs*255) + # plt.show() + + imgs = imgs.astype(np.uint8) + imgray = cv2.cvtColor(imgs, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + thresh = cv2.dilate(thresh, self.kernel, iterations=30) + contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) + + cnt = contours[np.argmax(cnt_size)] + + x, y, w, h = cv2.boundingRect(cnt) + + box = [x, y, w, h] + + croped_page, page_coord = self.crop_image_inside_box(box, img_org_copy) + + session_page.close() + del model_page + del session_page + + gc.collect() + return croped_page, page_coord + + def extract_text_regions(self, img): + model_region, session_region = self.start_new_session_and_model(self.model_region_dir) + + img_height_region = model_region.layers[len(model_region.layers) - 1].output_shape[1] + img_width_region = model_region.layers[len(model_region.layers) - 1].output_shape[2] + n_classes = model_region.layers[len(model_region.layers) - 1].output_shape[3] + margin = True + if margin: + + width = img_width_region + height = img_height_region + + # offset=int(.1*width) + offset = int(0.03 * width) + + width_mid = width - 2 * offset + height_mid = height - 2 * offset + + img = self.otsu_copy(img) + img = img.astype(np.uint8) + ###img = cv2.medianBlur(img,5) + + # img = cv2.medianBlur(img,5) + + # img=cv2.bilateralFilter(img,9,75,75) + # img=cv2.bilateralFilter(img,9,75,75) + + img = img / 255.0 + + img_h = img.shape[0] + img_w = img.shape[1] + + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width_mid) + nyf = img_h / float(height_mid) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + else: + nxf = int(nxf) + + if nyf > int(nyf): + nyf = int(nyf) + 1 + else: + nyf = int(nyf) + + for i in range(nxf): + for j in range(nyf): + + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + width # (i+1)*width + elif i > 0: + index_x_d = i * width_mid + index_x_u = index_x_d + width # (i+1)*width + + if j == 0: + index_y_d = j * height_mid + index_y_u = index_y_d + height # (j+1)*height + elif j > 0: + index_y_d = j * height_mid + index_y_u = index_y_d + height # (j+1)*height + + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - width + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - height + + img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + label_p_pred = model_region.predict( + img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + + seg = np.argmax(label_p_pred, axis=3)[0] + + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + + seg_color = seg_color[offset:seg_color.shape[0] - offset, offset:seg_color.shape[1] - offset, :] + seg = seg[offset:seg.shape[0] - offset, offset:seg.shape[1] - offset] + + mask_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset] = seg + prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset, + :] = seg_color + + prediction_true = prediction_true.astype(np.uint8) + session_region.close() + + del model_region + del session_region + gc.collect() + return prediction_true + + def get_text_region_contours_and_boxes(self, image): + rgb_class = (1, 1, 1) + mask = np.all(image == rgb_class, axis=-1) + + image = np.repeat(mask[:, :, np.newaxis], 3, axis=2) * 255 + image = image.astype(np.uint8) + + image = cv2.morphologyEx(image, cv2.MORPH_OPEN, self.kernel) + image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, self.kernel) + # image = cv2.erode(image,self.kernel,iterations = 3) + + # image = cv2.dilate(image,self.kernel,iterations = 3) + + imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + # commenst_contours=self.filter_contours_area_of_image(thresh,contours,hirarchy,max_area=0.0002,min_area=0.0001) + main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.0001) + + img_comm = np.zeros(thresh.shape) + img_comm_in = cv2.fillPoly(img_comm, pts=main_contours, color=(255, 255, 255)) + + img_comm_in = np.repeat(img_comm_in[:, :, np.newaxis], 3, axis=2) + + img_comm_in = img_comm_in.astype(np.uint8) + # img_comm_in_de=self.deskew_images(img_comm_in) + + imgray = cv2.cvtColor(img_comm_in, cv2.COLOR_BGR2GRAY) + + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + boxes = [] + contours_new = [] + for jj in range(len(contours)): + if hirarchy[0][jj][2] == -1: + x, y, w, h = cv2.boundingRect(contours[jj]) + boxes.append([x, y, w, h]) + contours_new.append(contours[jj]) + + return boxes, contours_new + + def get_all_image_patches_based_on_text_regions(self, boxes, image_page): + self.all_text_images = [] + self.all_box_coord = [] + for jk in range(len(boxes)): + crop_img, crop_coor = self.crop_image_inside_box(boxes[jk], image_page) + self.all_text_images.append(crop_img) + self.all_box_coord.append(crop_coor) + + def textline_contours(self, img): + model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir) + img_height_textline = model_textline.layers[len(model_textline.layers) - 1].output_shape[1] + img_width_textline = model_textline.layers[len(model_textline.layers) - 1].output_shape[2] + n_classes = model_textline.layers[len(model_textline.layers) - 1].output_shape[3] + + img_org = img.copy() + + if img.shape[0] < img_height_textline: + img = cv2.resize(img, (img.shape[1], img_width_textline), interpolation=cv2.INTER_NEAREST) + + if img.shape[1] < img_width_textline: + img = cv2.resize(img, (img_height_textline, img.shape[0]), interpolation=cv2.INTER_NEAREST) + + margin = False + if not margin: + + width = img_width_textline + height = img_height_textline + + img = self.otsu_copy(img) + img = img.astype(np.uint8) + # for _ in range(4): + # img = cv2.medianBlur(img,5) + img = img / 255.0 + + img_h = img.shape[0] + img_w = img.shape[1] + + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width) + nyf = img_h / float(height) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + else: + nxf = int(nxf) + + if nyf > int(nyf): + nyf = int(nyf) + 1 + else: + nyf = int(nyf) + + for i in range(nxf): + for j in range(nyf): + index_x_d = i * width + index_x_u = (i + 1) * width + + index_y_d = j * height + index_y_u = (j + 1) * height + + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - width + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - height + + img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + label_p_pred = model_textline.predict( + img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + seg = np.argmax(label_p_pred, axis=3)[0] + seg_color = self.color_images(seg, n_classes) + mask_true[index_y_d:index_y_u, index_x_d:index_x_u] = seg + prediction_true[index_y_d:index_y_u, index_x_d:index_x_u, :] = seg_color + + y_predi = mask_true + y_predi = cv2.resize(y_predi, (img_org.shape[1], img_org.shape[0]), interpolation=cv2.INTER_NEAREST) + return y_predi + + def get_textlines_for_each_textregions(self, textline_mask_tot, boxes): + textline_mask_tot = cv2.erode(textline_mask_tot, self.kernel, iterations=1) + self.area_of_cropped = [] + self.all_text_region_raw = [] + for jk in range(len(boxes)): + crop_img, crop_coor = self.crop_image_inside_box(boxes[jk], + np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) + self.all_text_region_raw.append(crop_img[:, :, 0]) + self.area_of_cropped.append(crop_img.shape[0] * crop_img.shape[1]) + + def seperate_lines(self, mada, contour_text_interest, thetha): + (h, w) = mada.shape[:2] + center = (w // 2, h // 2) + M = cv2.getRotationMatrix2D(center, -thetha, 1.0) + x_d = M[0, 2] + y_d = M[1, 2] + + thetha = thetha / 180. * np.pi + rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]]) + contour_text_interest_copy = contour_text_interest.copy() + + x_cont = contour_text_interest[:, 0, 0] + y_cont = contour_text_interest[:, 0, 1] + x_cont = x_cont - np.min(x_cont) + y_cont = y_cont - np.min(y_cont) + + x_min_cont = 0 + x_max_cont = mada.shape[1] + y_min_cont = 0 + y_max_cont = mada.shape[0] + + xv = np.linspace(x_min_cont, x_max_cont, 1000) + + mada_n = mada.sum(axis=1) + + first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) + + y = mada_n[:] # [first_nonzero:last_nonzero] + y_help = np.zeros(len(y) + 40) + y_help[20:len(y) + 20] = y + x = np.array(range(len(y))) + + peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) + if len(peaks_real)<=2 and len(peaks_real)>1: + sigma_gaus=10 + else: + sigma_gaus=8 + + + z= gaussian_filter1d(y_help, sigma_gaus) + zneg_rev=-y_help+np.max(y_help) + zneg=np.zeros(len(zneg_rev)+40) + zneg[20:len(zneg_rev)+20]=zneg_rev + zneg= gaussian_filter1d(zneg, sigma_gaus) + + peaks, _ = find_peaks(z, height=0) + peaks_neg, _ = find_peaks(zneg, height=0) + + peaks_neg = peaks_neg - 20 - 20 + peaks = peaks - 20 + + for jj in range(len(peaks_neg)): + if peaks_neg[jj] > len(x) - 1: + peaks_neg[jj] = len(x) - 1 + + for jj in range(len(peaks)): + if peaks[jj] > len(x) - 1: + peaks[jj] = len(x) - 1 + + textline_boxes = [] + textline_boxes_rot = [] + + if len(peaks_neg) == len(peaks) + 1 and len(peaks) >= 3: + for jj in range(len(peaks)): + dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) + dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) + + point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + + point_down_narrow = peaks[jj] + first_nonzero + int( + 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + + if point_down >= mada.shape[0]: + point_down = mada.shape[0] - 2 + + if point_down_narrow >= mada.shape[0]: + point_down_narrow = mada.shape[0] - 2 + + distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) + for mj in range(len(xv))] + distances = np.array(distances) + + xvinside = xv[distances >= 0] + + if len(xvinside) == 0: + x_min = x_min_cont + x_max = x_max_cont + else: + x_min = np.min(xvinside) # max(x_min_interest,x_min_cont) + x_max = np.max(xvinside) # min(x_max_interest,x_max_cont) + + p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)]) + p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)]) + p3 = np.dot(rotation_matrix, [int(x_max), int(point_down)]) + p4 = np.dot(rotation_matrix, [int(x_min), int(point_down)]) + + x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d + x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d + x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d + x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d + + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + + textline_boxes.append(np.array([[int(x_min), int(point_up)], + [int(x_max), int(point_up)], + [int(x_max), int(point_down)], + [int(x_min), int(point_down)]])) + + elif len(peaks) < 1: + pass + + elif len(peaks) == 1: + x_min = x_min_cont + x_max = x_max_cont + + y_min = y_min_cont + y_max = y_max_cont + + p1 = np.dot(rotation_matrix, [int(x_min), int(y_min)]) + p2 = np.dot(rotation_matrix, [int(x_max), int(y_min)]) + p3 = np.dot(rotation_matrix, [int(x_max), int(y_max)]) + p4 = np.dot(rotation_matrix, [int(x_min), int(y_max)]) + + x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d + x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d + x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d + x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d + + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + + textline_boxes.append(np.array([[int(x_min), int(y_min)], + [int(x_max), int(y_min)], + [int(x_max), int(y_max)], + [int(x_min), int(y_max)]])) + + + + elif len(peaks) == 2: + dis_to_next = np.abs(peaks[1] - peaks[0]) + for jj in range(len(peaks)): + if jj == 0: + point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) + if point_up < 0: + point_up = 1 + point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) + elif jj == 1: + point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) + if point_down >= mada.shape[0]: + point_down = mada.shape[0] - 2 + point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) + + distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) + for mj in range(len(xv))] + distances = np.array(distances) + + xvinside = xv[distances >= 0] + + if len(xvinside) == 0: + x_min = x_min_cont + x_max = x_max_cont + else: + x_min = np.min(xvinside) # max(x_min_interest,x_min_cont) + x_max = np.max(xvinside) # min(x_max_interest,x_max_cont) + + p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)]) + p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)]) + p3 = np.dot(rotation_matrix, [int(x_max), int(point_down)]) + p4 = np.dot(rotation_matrix, [int(x_min), int(point_down)]) + + x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d + x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d + x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d + x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d + + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + + textline_boxes.append(np.array([[int(x_min), int(point_up)], + [int(x_max), int(point_up)], + [int(x_max), int(point_down)], + [int(x_min), int(point_down)]])) + else: + for jj in range(len(peaks)): + + if jj == 0: + dis_to_next = peaks[jj + 1] - peaks[jj] + # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) + point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) + if point_up < 0: + point_up = 1 + # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next) + point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) + elif jj == len(peaks) - 1: + dis_to_next = peaks[jj] - peaks[jj - 1] + # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next) + point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) + if point_down >= mada.shape[0]: + point_down = mada.shape[0] - 2 + # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) + point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) + else: + dis_to_next_down = peaks[jj + 1] - peaks[jj] + dis_to_next_up = peaks[jj] - peaks[jj - 1] + + point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next_up) + point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down) + + distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) + for mj in range(len(xv))] + distances = np.array(distances) + + xvinside = xv[distances >= 0] + + if len(xvinside) == 0: + x_min = x_min_cont + x_max = x_max_cont + else: + x_min = np.min(xvinside) # max(x_min_interest,x_min_cont) + x_max = np.max(xvinside) # min(x_max_interest,x_max_cont) + + p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)]) + p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)]) + p3 = np.dot(rotation_matrix, [int(x_max), int(point_down)]) + p4 = np.dot(rotation_matrix, [int(x_min), int(point_down)]) + + x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d + x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d + x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d + x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d + + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + + textline_boxes.append(np.array([[int(x_min), int(point_up)], + [int(x_max), int(point_up)], + [int(x_max), int(point_down)], + [int(x_min), int(point_down)]])) + + mada_new = np.zeros((mada.shape[0], mada.shape[1], 3)) + mada_new = cv2.fillPoly(mada_new, pts=textline_boxes, color=(255, 255, 255)) + + mada_new = mada_new.astype(np.uint8) + return mada_new, peaks, textline_boxes_rot + + def textline_contours_postprocessing(self, textline_mask, img_patch, slope, contour_text_interest, box_ind): + + textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 + + textline_mask = textline_mask.astype(np.uint8) + kernel = np.ones((5, 5), np.uint8) + textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, kernel) + textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, kernel) + textline_mask = cv2.erode(textline_mask, kernel, iterations=1) + imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY) + + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) + thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) + + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + commenst_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=0.01, + min_area=0.003) + main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.003) + # interior_contours=self.filter_contours_area_of_image_interiors(thresh,contours,hirarchy,max_area=1,min_area=0) + + img_comm = np.zeros(thresh.shape) + img_comm_in = cv2.fillPoly(img_comm, pts=main_contours, color=(255, 255, 255)) + ###img_comm_in=cv2.fillPoly(img_comm, pts =interior_contours, color=(0,0,0)) + + img_comm_in = np.repeat(img_comm_in[:, :, np.newaxis], 3, axis=2) + img_comm_in = img_comm_in.astype(np.uint8) + + imgray = cv2.cvtColor(img_comm_in, cv2.COLOR_BGR2GRAY) + + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + contours_slope = contours # self.find_polugons_size_filter(contours,median_area=median_area,scaler_up=100,scaler_down=0.5) + + if len(contours_slope) > 0: + for jv in range(len(contours_slope)): + new_poly = list(contours_slope[jv]) + if jv == 0: + merged_all = new_poly + else: + merged_all = merged_all + new_poly + + merge = np.array(merged_all) + + img_in = np.zeros(textline_mask.shape) + img_p_in = cv2.fillPoly(img_in, pts=[merge], color=(255, 255, 255)) + + rect = cv2.minAreaRect(merge) + box = cv2.boxPoints(rect) + box = np.int0(box) + + dst = self.rotate_image(textline_mask, slope) + dst = dst[:, :, 0] + dst[dst != 0] = 1 + + contour_text_copy = contour_text_interest.copy() + + contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[ + 0] # np.min(contour_text_interest_copy[:,0,0]) + contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1] + + img_contour = np.zeros((box_ind[3], box_ind[2], 3)) + img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=(255, 255, 255)) + + img_contour_rot = self.rotate_image(img_contour, slope) + + # img_comm_in=np.repeat(img_comm_in[:, :, np.newaxis], 3, axis=2) + img_contour_rot = img_contour_rot.astype(np.uint8) + imgrayrot = cv2.cvtColor(img_contour_rot, cv2.COLOR_BGR2GRAY) + _, threshrot = cv2.threshold(imgrayrot, 0, 255, 0) + contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))] + ind_big_con = np.argmax(len_con_text_rot) + + textline_maskt = textline_mask[:, :, 0] + textline_maskt[textline_maskt != 0] = 1 + + sep_img, _, contours_rotated_clean = self.seperate_lines(dst, contours_text_rot[ind_big_con], slope) + + dst = self.rotate_image(sep_img, -slope) + + imgray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY) + + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) + thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) + + found_polygons, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + img_in = np.zeros(textline_mask.shape) + img_p_in = cv2.fillPoly(img_in, pts=found_polygons, color=(255, 255, 255)) + else: + + img_in = np.zeros(textline_mask.shape) + + img_p_in = cv2.fillPoly(img_in, pts=commenst_contours, color=(255, 255, 255)) + img_p_in = cv2.dilate(img_p_in, kernel, iterations=1) + contours_rotated_clean = [] + + return img_p_in, contours_rotated_clean + + def textline_contours_to_get_slope_correctly(self, textline_mask, img_patch, contour_interest): + + slope_new = 0 # deskew_images(img_patch) + + textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 + + textline_mask = textline_mask.astype(np.uint8) + textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, self.kernel) + textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, self.kernel) + textline_mask = cv2.erode(textline_mask, self.kernel, iterations=1) + imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, self.kernel) + thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, self.kernel) + + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + # commenst_contours=self.filter_contours_area_of_image(thresh,contours,hirarchy,max_area=0.01,min_area=0.003) + main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.003) + # interior_contours=self.filter_contours_area_of_image_interiors(thresh,contours,hirarchy,max_area=1,min_area=0) + + textline_maskt = textline_mask[:, :, 0] + textline_maskt[textline_maskt != 0] = 1 + + _, peaks_point, _ = self.seperate_lines(textline_maskt, contour_interest, slope_new) + + mean_dis = np.mean(np.diff(peaks_point)) + # mean_dis=np.median(np.diff(peaks_point)) + + len_x = thresh.shape[1] + # print(len_x,mean_dis,'x') + + slope_lines = [] + contours_slope_new = [] + for kk in range(len(main_contours)): + + xminh = np.min(main_contours[kk][:, 0]) + xmaxh = np.max(main_contours[kk][:, 0]) + + yminh = np.min(main_contours[kk][:, 1]) + ymaxh = np.max(main_contours[kk][:, 1]) + + # print(xminh,xmaxh ,yminh,ymaxh,ymaxh-yminh) + + if ymaxh - yminh <= mean_dis and ( + xmaxh - xminh) >= 0.3 * len_x: # xminh>=0.05*len_x and xminh<=0.4*len_x and xmaxh<=0.95*len_x and xmaxh>=0.6*len_x: + contours_slope_new.append(main_contours[kk]) + + rows, cols = thresh.shape[:2] + [vx, vy, x, y] = cv2.fitLine(main_contours[kk], cv2.DIST_L2, 0, 0.01, 0.01) + + slope_lines.append((vy / vx) / np.pi * 180) + + if len(slope_lines) >= 2: + + slope = np.mean(slope_lines) # slope_true/np.pi*180 + else: + slope = 999 + + else: + slope = 0 + + return slope + + def get_slopes_for_each_text_region(self, contours): + + # first let find the slop for biggest patch of text region + + index_max_area = np.argmax(self.area_of_cropped) + + denoised = cv2.blur(self.all_text_images[index_max_area], (5, 5)) # otsu_copy(crop_img)# + denoised = cv2.medianBlur(denoised, 5) # cv2.GaussianBlur(crop_img, (5, 5), 0) + denoised = cv2.GaussianBlur(denoised, (5, 5), 0) + denoised = self.otsu_copy(denoised) + denoised = denoised.astype(np.uint8) + slope_biggest = self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[index_max_area], + denoised, contours[index_max_area]) + + if np.abs(slope_biggest) > 2.5: + slope_biggest = 0 + + self.slopes = [] + for mv in range(len(self.all_text_images)): + denoised = cv2.blur(self.all_text_images[mv], (5, 5)) # otsu_copy(crop_img)# + denoised = cv2.medianBlur(denoised, 5) # cv2.GaussianBlur(crop_img, (5, 5), 0) + denoised = cv2.GaussianBlur(denoised, (5, 5), 0) + denoised = self.otsu_copy(denoised) + denoised = denoised.astype(np.uint8) + slope_for_all = self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[mv], denoised, + contours[mv]) + # text_patch_processed=textline_contours_postprocessing(gada) + + if np.abs(slope_for_all) > 2.5 and slope_for_all != 999: + slope_for_all = 0 + elif slope_for_all == 999: + slope_for_all = slope_biggest + self.slopes.append(slope_for_all) + + def deskew_textline_patches(self, contours, boxes): + self.all_text_region_processed = [] + self.all_found_texline_polygons = [] + + for jj in range(len(self.all_text_images)): + # print(all_text_images[jj][0,0,0],np.unique(all_text_images[jj][:,:,0])) + ###gada=self.all_text_images[jj][:,:,0] + ###gada=(gada[:,:]==0)*1 + # print(gada[0,0]) + + denoised = cv2.blur(self.all_text_images[jj], (5, 5)) # otsu_copy(crop_img)# + denoised = cv2.medianBlur(denoised, 5) # cv2.GaussianBlur(crop_img, (5, 5), 0) + denoised = cv2.GaussianBlur(denoised, (5, 5), 0) + denoised = self.otsu_copy(denoised) + denoised = denoised.astype(np.uint8) + text_patch_processed, cnt_clean_rot = self.textline_contours_postprocessing(self.all_text_region_raw[jj] + , denoised, self.slopes[jj], + contours[jj], boxes[jj]) + # text_patch_processed=textline_contours_postprocessing(gada) + self.all_text_region_processed.append(text_patch_processed) + + text_patch_processed = text_patch_processed.astype(np.uint8) + imgray = cv2.cvtColor(text_patch_processed, cv2.COLOR_BGR2GRAY) + + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + self.found_polygons, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + ####all_found_texline_polygons.append(found_polygons)cnt_clean_rot + self.all_found_texline_polygons.append(cnt_clean_rot) + + # img_v=np.zeros(text_patch_processed.shape) + # img_v=cv2.fillPoly(img_v, pts =found_polygons, color=(255,255,255)) + # sumi=np.sum(np.sum(self.all_text_images[jj],axis=2),axis=1) + + def write_into_page_xml(self, contours, page_coord, dir_of_image): + + found_polygons_text_region = contours + data = ET.Element('PcGts') + + data.set('xmlns', "http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15") + data.set('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance") + data.set('xsi:schemaLocation', "http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15") + # data.set('http',"http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15/pagecontent.xsd") + + metadata = ET.SubElement(data, 'Metadata') + + author = ET.SubElement(metadata, 'Creator') + author.text = 'Vahid' + + created = ET.SubElement(metadata, 'Created') + created.text = '2019-06-17T18:15:12' + + changetime = ET.SubElement(metadata, 'LastChange') + changetime.text = '2019-06-17T18:15:12' + + page = ET.SubElement(data, 'Page') + + page.set('imageFilename', self.f_name + '.tif') + page.set('imageHeight', str(self.height_org)) + page.set('imageWidth', str(self.width_org)) + + id_indexer = 0 + + for mm in range(len(found_polygons_text_region)): + textregion = ET.SubElement(page, 'TextRegion') + + textregion.set('id', 'r' + str(id_indexer)) + id_indexer += 1 + + if mm == 0: + textregion.set('type', 'heading') + else: + textregion.set('type', 'paragraph') + coord_text = ET.SubElement(textregion, 'Coords') + + points_co = '' + for lmm in range(len(found_polygons_text_region[mm])): + + if len(found_polygons_text_region[mm][lmm]) == 2: + points_co = points_co + str( + int((found_polygons_text_region[mm][lmm][0] + page_coord[2]) / self.scale_x)) + points_co = points_co + ',' + points_co = points_co + str( + int((found_polygons_text_region[mm][lmm][1] + page_coord[0]) / self.scale_y)) + else: + points_co = points_co + str( + int((found_polygons_text_region[mm][lmm][0][0] + page_coord[2]) / self.scale_x)) + points_co = points_co + ',' + points_co = points_co + str( + int((found_polygons_text_region[mm][lmm][0][1] + page_coord[0]) / self.scale_y)) + + if lmm < (len(found_polygons_text_region[mm]) - 1): + points_co = points_co + ' ' + # print(points_co) + coord_text.set('points', points_co) + + for j in range(len(self.all_found_texline_polygons[mm])): + + textline = ET.SubElement(textregion, 'TextLine') + + textline.set('id', 'l' + str(id_indexer)) + + id_indexer += 1 + + coord = ET.SubElement(textline, 'Coords') + + texteq = ET.SubElement(textline, 'TextEquiv') + + uni = ET.SubElement(texteq, 'Unicode') + uni.text = ' ' + + # points = ET.SubElement(coord, 'Points') + + points_co = '' + for l in range(len(self.all_found_texline_polygons[mm][j])): + # point = ET.SubElement(coord, 'Point') + + # point.set('x',str(found_polygons[j][l][0])) + # point.set('y',str(found_polygons[j][l][1])) + if len(self.all_found_texline_polygons[mm][j][l]) == 2: + points_co = points_co + str(int((self.all_found_texline_polygons[mm][j][l][0] + page_coord[2] + + self.all_box_coord[mm][2]) / self.scale_x)) + points_co = points_co + ',' + points_co = points_co + str(int((self.all_found_texline_polygons[mm][j][l][1] + page_coord[0] + + self.all_box_coord[mm][0]) / self.scale_y)) + else: + points_co = points_co + str(int((self.all_found_texline_polygons[mm][j][l][0][0] + page_coord[2] + + self.all_box_coord[mm][2]) / self.scale_x)) + points_co = points_co + ',' + points_co = points_co + str(int((self.all_found_texline_polygons[mm][j][l][0][1] + page_coord[0] + + self.all_box_coord[mm][0]) / self.scale_y)) + + if l < (len(self.all_found_texline_polygons[mm][j]) - 1): + points_co = points_co + ' ' + # print(points_co) + coord.set('points', points_co) + + texteqreg = ET.SubElement(textregion, 'TextEquiv') + + unireg = ET.SubElement(texteqreg, 'Unicode') + unireg.text = ' ' + + tree = ET.ElementTree(data) + tree.write(dir_of_image + self.f_name + ".xml") + + def run(self): + self.get_image_and_scales() + image_page,page_coord=self.extract_page() + text_regions=self.extract_text_regions(image_page) + boxes,contours=self.get_text_region_contours_and_boxes(text_regions) + self.get_all_image_patches_based_on_text_regions(boxes,image_page) + textline_mask_tot=self.textline_contours(image_page) + + self.get_textlines_for_each_textregions(textline_mask_tot,boxes) + self.get_slopes_for_each_text_region(contours) + self.deskew_textline_patches(contours, boxes) + self.write_into_page_xml(contours, page_coord, self.dir_out) + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument('-i', '--image', dest='inp1', default=None, help='directory of image.') + parser.add_argument('-o', '--out', dest='inp2', default=None, help='directory to write output xml data.') + parser.add_argument('-m', '--model', dest='inp3', default=None, help='directory of models.') + + options = parser.parse_args() + + possibles = globals() + possibles.update(locals()) + x = textlineerkenner(options.inp1, options.inp2, options.inp3) + x.run() + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4b9bf38 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +opencv-python +numpy<=1.14.5 +matplotlib +seaborn +tqdm +keras +shapely +scikit-learn +tensorflow-gpu < 2.0 +scipy diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0b0e9df --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +from io import open +from setuptools import find_packages, setup + +with open('requirements.txt') as fp: + install_requires = fp.read() + +setup( + name="qurator-sbb-textline", + version="0.0.1", + author="The Qurator Team", + author_email="qurator@sbb.spk-berlin.de", + description="Qurator", + long_description=open("README.md", "r", encoding='utf-8').read(), + long_description_content_type="text/markdown", + keywords='qurator', + license='Apache', + url="https://qurator.ai", + packages=find_packages(exclude=["*.tests", "*.tests.*", + "tests.*", "tests"]), + install_requires=install_requires, + entry_points={ + 'console_scripts': [ + "text_line_recognition=main:main", + ] + }, + python_requires='>=3.6.0', + tests_require=['pytest'], + classifiers=[ + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + ], +) \ No newline at end of file From 599bbf1c863175ec564a8d1468e183ff7323fa18 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 10 Oct 2019 16:24:28 +0200 Subject: [PATCH 02/47] =?UTF-8?q?=F0=9F=A7=B9=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Use=20same=20structure=20as=20the=20other=20projects?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 7 ++++--- qurator/__init__.py | 1 + qurator/sbb_textline_detector/__init__.py | 1 + main.py => qurator/sbb_textline_detector/main.py | 0 setup.py | 4 ++-- 5 files changed, 8 insertions(+), 5 deletions(-) create mode 100644 qurator/__init__.py create mode 100644 qurator/sbb_textline_detector/__init__.py rename main.py => qurator/sbb_textline_detector/main.py (100%) diff --git a/Dockerfile b/Dockerfile index 020db6f..20681e3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,9 @@ FROM python:3 -ADD main.py / ADD requirements.txt / - RUN pip install --proxy=http-proxy.sbb.spk-berlin.de:3128 -r requirements.txt -ENTRYPOINT ["python", "./main.py"] +COPY . /usr/src/sbb_textline_detector +RUN pip install /usr/src/sbb_textline_detector + +ENTRYPOINT ["sbb_textline_detector"] diff --git a/qurator/__init__.py b/qurator/__init__.py new file mode 100644 index 0000000..b0d6433 --- /dev/null +++ b/qurator/__init__.py @@ -0,0 +1 @@ +__import__('pkg_resources').declare_namespace(__name__) \ No newline at end of file diff --git a/qurator/sbb_textline_detector/__init__.py b/qurator/sbb_textline_detector/__init__.py new file mode 100644 index 0000000..15b6a64 --- /dev/null +++ b/qurator/sbb_textline_detector/__init__.py @@ -0,0 +1 @@ +from .main import * diff --git a/main.py b/qurator/sbb_textline_detector/main.py similarity index 100% rename from main.py rename to qurator/sbb_textline_detector/main.py diff --git a/setup.py b/setup.py index 0b0e9df..faff412 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ setup( install_requires=install_requires, entry_points={ 'console_scripts': [ - "text_line_recognition=main:main", + "sbb_textline_detector=qurator.sbb_textline_detector:main", ] }, python_requires='>=3.6.0', @@ -31,4 +31,4 @@ setup( 'Programming Language :: Python :: 3', 'Topic :: Scientific/Engineering :: Artificial Intelligence', ], -) \ No newline at end of file +) From 91fb2e01a66ff425327c8ddbc8f29d3955cfe5f9 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 10 Oct 2019 16:25:43 +0200 Subject: [PATCH 03/47] =?UTF-8?q?=F0=9F=93=9D=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Fix=20help=20for=20input=20filename?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 1701382..b09d665 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1317,7 +1317,7 @@ class textlineerkenner: def main(): parser = argparse.ArgumentParser() - parser.add_argument('-i', '--image', dest='inp1', default=None, help='directory of image.') + parser.add_argument('-i', '--image', dest='inp1', default=None, help='image filename.') parser.add_argument('-o', '--out', dest='inp2', default=None, help='directory to write output xml data.') parser.add_argument('-m', '--model', dest='inp3', default=None, help='directory of models.') From 561a6f8a904fac0c3444dacf480dc24cf562917a Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 10 Oct 2019 16:40:48 +0200 Subject: [PATCH 04/47] =?UTF-8?q?=E2=9A=99=20sbb=5Ftextline=5Fdetector:=20?= =?UTF-8?q?Use=20click=20instead=20of=20argparse?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/main.py | 20 ++++++++------------ requirements.txt | 1 + 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index b09d665..c7d47e2 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -23,7 +23,7 @@ from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d import xml.etree.ElementTree as ET import warnings -import argparse +import click with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -1314,18 +1314,14 @@ class textlineerkenner: self.write_into_page_xml(contours, page_coord, self.dir_out) -def main(): - parser = argparse.ArgumentParser() - - parser.add_argument('-i', '--image', dest='inp1', default=None, help='image filename.') - parser.add_argument('-o', '--out', dest='inp2', default=None, help='directory to write output xml data.') - parser.add_argument('-m', '--model', dest='inp3', default=None, help='directory of models.') - - options = parser.parse_args() - - possibles = globals() +@click.command() +@click.option('--image', '-i', help='image filename', type=click.Path(exists=True, dir_okay=False)) +@click.option('--out', '-o', help='directory to write output xml data', type=click.Path(exists=True, file_okay=False)) +@click.option('--model', '-m', help='directory of models', type=click.Path(exists=True, file_okay=False)) +def main(image, out, model): + possibles = globals() # XXX unused? possibles.update(locals()) - x = textlineerkenner(options.inp1, options.inp2, options.inp3) + x = textlineerkenner(image, out, model) x.run() diff --git a/requirements.txt b/requirements.txt index 4b9bf38..58d61f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ shapely scikit-learn tensorflow-gpu < 2.0 scipy +click From 0c915c75de3735eb270790c9a6ffd052c11d930d Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 10 Oct 2019 17:54:42 +0200 Subject: [PATCH 05/47] =?UTF-8?q?=E2=9C=A8=20sbb=5Ftextline=5Fdetector:=20?= =?UTF-8?q?Add=20a=20OCR-D=20interface?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ocrd-tool.json | 1 + qurator/sbb_textline_detector/__init__.py | 1 + qurator/sbb_textline_detector/main.py | 23 ++++---- qurator/sbb_textline_detector/ocrd-tool.json | 19 ++++++ qurator/sbb_textline_detector/ocrd_cli.py | 61 ++++++++++++++++++++ requirements.txt | 3 +- setup.py | 1 + 7 files changed, 98 insertions(+), 11 deletions(-) create mode 120000 ocrd-tool.json create mode 100644 qurator/sbb_textline_detector/ocrd-tool.json create mode 100644 qurator/sbb_textline_detector/ocrd_cli.py diff --git a/ocrd-tool.json b/ocrd-tool.json new file mode 120000 index 0000000..a1e5650 --- /dev/null +++ b/ocrd-tool.json @@ -0,0 +1 @@ +qurator/sbb_textline_detector/ocrd-tool.json \ No newline at end of file diff --git a/qurator/sbb_textline_detector/__init__.py b/qurator/sbb_textline_detector/__init__.py index 15b6a64..b7c0712 100644 --- a/qurator/sbb_textline_detector/__init__.py +++ b/qurator/sbb_textline_detector/__init__.py @@ -1 +1,2 @@ from .main import * +from .ocrd_cli import * diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index c7d47e2..d775aa5 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -35,16 +35,18 @@ __doc__ = \ class textlineerkenner: - def __init__(self, image_dir, dir_out, dir_models): + def __init__(self, image_dir, dir_out, f_name, dir_models): self.image_dir = image_dir self.dir_out = dir_out + self.f_name = f_name + if self.f_name is None: + try: + self.f_name = image_dir.split('/')[len(image_dir.split('/')) - 1] + self.f_name = self.f_name.split('.')[0] + print(self.f_name) + except: + self.f_name = self.f_name.split('.')[0] self.dir_models = dir_models - try: - self.f_name = image_dir.split('/')[len(image_dir.split('/')) - 1] - self.f_name = self.f_name.split('.')[0] - print(self.f_name) - except: - self.f_name = self.f_name.split('.')[0] self.kernel = np.ones((5, 5), np.uint8) self.model_page_dir = dir_models + '/model_page.h5' self.model_region_dir = dir_models + '/model_strukturerkennung.h5' @@ -365,6 +367,7 @@ class textlineerkenner: return img_r def get_image_and_scales(self): + print(self.image_dir) self.image = cv2.imread(self.image_dir) self.height_org = self.image.shape[0] self.width_org = self.image.shape[1] @@ -1298,7 +1301,7 @@ class textlineerkenner: unireg.text = ' ' tree = ET.ElementTree(data) - tree.write(dir_of_image + self.f_name + ".xml") + tree.write(os.path.join(dir_of_image, self.f_name) + ".xml") def run(self): self.get_image_and_scales() @@ -1307,7 +1310,7 @@ class textlineerkenner: boxes,contours=self.get_text_region_contours_and_boxes(text_regions) self.get_all_image_patches_based_on_text_regions(boxes,image_page) textline_mask_tot=self.textline_contours(image_page) - + self.get_textlines_for_each_textregions(textline_mask_tot,boxes) self.get_slopes_for_each_text_region(contours) self.deskew_textline_patches(contours, boxes) @@ -1321,7 +1324,7 @@ class textlineerkenner: def main(image, out, model): possibles = globals() # XXX unused? possibles.update(locals()) - x = textlineerkenner(image, out, model) + x = textlineerkenner(image, out, None, model) x.run() diff --git a/qurator/sbb_textline_detector/ocrd-tool.json b/qurator/sbb_textline_detector/ocrd-tool.json new file mode 100644 index 0000000..b76f439 --- /dev/null +++ b/qurator/sbb_textline_detector/ocrd-tool.json @@ -0,0 +1,19 @@ +{ + "version": "0.0.1", + "tools": { + "ocrd_sbb_textline_detector": { + "executable": "ocrd_sbb_textline_detector", + "description": "Detect lines", + "steps": ["layout/segmentation/line"], + "input_file_grp": [ + "OCR-D-IMG" + ], + "output_file_grp": [ + "OCR-D-SBB-SEG-LINE" + ], + "parameters": { + "model": {"type": "string", "format": "file", "cacheable": true} + } + } + } +} diff --git a/qurator/sbb_textline_detector/ocrd_cli.py b/qurator/sbb_textline_detector/ocrd_cli.py new file mode 100644 index 0000000..854a5dc --- /dev/null +++ b/qurator/sbb_textline_detector/ocrd_cli.py @@ -0,0 +1,61 @@ +import json +import os + +import click +from ocrd import Processor +from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor +from ocrd_utils import concat_padded, getLogger +from pkg_resources import resource_string + +from qurator.sbb_textline_detector import textlineerkenner + +log = getLogger('processor.OcrdSbbTextlineDetectorRecognize') + +OCRD_TOOL = json.loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) + + +@click.command() +@ocrd_cli_options +def ocrd_sbb_textline_detector(*args, **kwargs): + return ocrd_cli_wrap_processor(OcrdSbbTextlineDetectorRecognize, *args, **kwargs) + + +class OcrdSbbTextlineDetectorRecognize(Processor): + + def __init__(self, *args, **kwargs): + kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd_sbb_textline_detector'] + super(OcrdSbbTextlineDetectorRecognize, self).__init__(*args, **kwargs) + + def _make_file_id(self, input_file, input_file_grp, n): + file_id = input_file.ID.replace(input_file_grp, self.output_file_grp) + if file_id == input_file.ID: + file_id = concat_padded(self.output_file_grp, n) + return file_id + + def process(self): + for n, page_id in enumerate(self.workspace.mets.physical_pages): + image_file = self.workspace.mets.find_files(fileGrp=self.input_file_grp, pageId=page_id)[0] + log.info("INPUT FILE %i / %s", n, image_file) + + file_id = self._make_file_id(image_file, self.output_file_grp, n) + + # Process the files + try: + os.mkdir(self.output_file_grp) + except FileExistsError: + pass + + model = self.parameter['model'] + x = textlineerkenner(image_file.local_filename, self.output_file_grp, file_id, model) + x.run() + + self.workspace.add_file( + ID=file_id + '.xml', + file_grp=self.output_file_grp, + pageId=page_id, + mimetype='application/vnd.prima.page+xml', + local_filename=self.output_file_grp + '/' + file_id) + + +if __name__ == '__main__': + ocrd_sbb_textline_detector() diff --git a/requirements.txt b/requirements.txt index 58d61f7..acc0101 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ opencv-python -numpy<=1.14.5 +numpy matplotlib seaborn tqdm @@ -9,3 +9,4 @@ scikit-learn tensorflow-gpu < 2.0 scipy click +ocrd >= 1.0.0b19 diff --git a/setup.py b/setup.py index faff412..26eff52 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,7 @@ setup( entry_points={ 'console_scripts': [ "sbb_textline_detector=qurator.sbb_textline_detector:main", + "ocrd_sbb_textline_detector=qurator.sbb_textline_detector:ocrd_sbb_textline_detector", ] }, python_requires='>=3.6.0', From 5fd04677f9871675bc5babee46e562d035e2021b Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Thu, 10 Oct 2019 18:31:29 +0200 Subject: [PATCH 06/47] =?UTF-8?q?=F0=9F=90=9B=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Fix=20filenames=20of=20created=20OCR-D=20file=20group?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/ocrd_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/sbb_textline_detector/ocrd_cli.py b/qurator/sbb_textline_detector/ocrd_cli.py index 854a5dc..8df0baf 100644 --- a/qurator/sbb_textline_detector/ocrd_cli.py +++ b/qurator/sbb_textline_detector/ocrd_cli.py @@ -54,7 +54,7 @@ class OcrdSbbTextlineDetectorRecognize(Processor): file_grp=self.output_file_grp, pageId=page_id, mimetype='application/vnd.prima.page+xml', - local_filename=self.output_file_grp + '/' + file_id) + local_filename=os.path.join(self.output_file_grp, file_id) + '.xml') if __name__ == '__main__': From b960d000180d30b6ded20bd674cc905fcf4a3ec4 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 11 Oct 2019 13:12:32 +0200 Subject: [PATCH 07/47] =?UTF-8?q?=F0=9F=9A=A7=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20XXX=20image=5Fdir=20is=20probably=20a=20file,=20not=20dir?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index d775aa5..62fbc93 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -36,7 +36,7 @@ __doc__ = \ class textlineerkenner: def __init__(self, image_dir, dir_out, f_name, dir_models): - self.image_dir = image_dir + self.image_dir = image_dir # XXX This does not seem to be a directory as the name suggests, but a file self.dir_out = dir_out self.f_name = f_name if self.f_name is None: From d5a020fb3a252651c88994d87e4a381aba87bd3a Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 11 Oct 2019 13:13:25 +0200 Subject: [PATCH 08/47] =?UTF-8?q?=F0=9F=A7=B9=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Remove=20debug=20print()s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 62fbc93..dfe47ec 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -43,7 +43,6 @@ class textlineerkenner: try: self.f_name = image_dir.split('/')[len(image_dir.split('/')) - 1] self.f_name = self.f_name.split('.')[0] - print(self.f_name) except: self.f_name = self.f_name.split('.')[0] self.dir_models = dir_models @@ -367,7 +366,6 @@ class textlineerkenner: return img_r def get_image_and_scales(self): - print(self.image_dir) self.image = cv2.imread(self.image_dir) self.height_org = self.image.shape[0] self.width_org = self.image.shape[1] From 1c7d45d3d028e0d4f71681733d57df965b2a09af Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 11 Oct 2019 13:14:57 +0200 Subject: [PATCH 09/47] =?UTF-8?q?=E2=99=BB=20sbb=5Ftextline=5Fdetector:=20?= =?UTF-8?q?Remove=20redundant=20and=20wrongly=20named=20parameter=20dir=5F?= =?UTF-8?q?of=5Fimage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index dfe47ec..affb1c0 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1190,7 +1190,7 @@ class textlineerkenner: # img_v=cv2.fillPoly(img_v, pts =found_polygons, color=(255,255,255)) # sumi=np.sum(np.sum(self.all_text_images[jj],axis=2),axis=1) - def write_into_page_xml(self, contours, page_coord, dir_of_image): + def write_into_page_xml(self, contours, page_coord): found_polygons_text_region = contours data = ET.Element('PcGts') @@ -1299,7 +1299,7 @@ class textlineerkenner: unireg.text = ' ' tree = ET.ElementTree(data) - tree.write(os.path.join(dir_of_image, self.f_name) + ".xml") + tree.write(os.path.join(self.dir_out, self.f_name) + ".xml") def run(self): self.get_image_and_scales() @@ -1312,7 +1312,7 @@ class textlineerkenner: self.get_textlines_for_each_textregions(textline_mask_tot,boxes) self.get_slopes_for_each_text_region(contours) self.deskew_textline_patches(contours, boxes) - self.write_into_page_xml(contours, page_coord, self.dir_out) + self.write_into_page_xml(contours, page_coord) @click.command() From b4bef6460c048a64ebbce6ccf2e94c6f23cebd9d Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 11 Oct 2019 13:15:33 +0200 Subject: [PATCH 10/47] =?UTF-8?q?=F0=9F=90=9B=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Use=20the=20correct=20image=20filename=20in=20the=20output?= =?UTF-8?q?=20PAGE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index affb1c0..fdd5308 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1213,7 +1213,7 @@ class textlineerkenner: page = ET.SubElement(data, 'Page') - page.set('imageFilename', self.f_name + '.tif') + page.set('imageFilename', self.image_dir) page.set('imageHeight', str(self.height_org)) page.set('imageWidth', str(self.width_org)) From 2199bf0d8c3787733e3f18ebca3de2e7b4db8f53 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 11 Oct 2019 14:37:51 +0200 Subject: [PATCH 11/47] =?UTF-8?q?=F0=9F=A7=B9=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Remove=20extra=20.xml=20suffix=20from=20METS=20file=20id?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/ocrd_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/sbb_textline_detector/ocrd_cli.py b/qurator/sbb_textline_detector/ocrd_cli.py index 8df0baf..029c185 100644 --- a/qurator/sbb_textline_detector/ocrd_cli.py +++ b/qurator/sbb_textline_detector/ocrd_cli.py @@ -50,7 +50,7 @@ class OcrdSbbTextlineDetectorRecognize(Processor): x.run() self.workspace.add_file( - ID=file_id + '.xml', + ID=file_id, file_grp=self.output_file_grp, pageId=page_id, mimetype='application/vnd.prima.page+xml', From c4d0d98ebf9df4e485616f9de43b8d3eb95e3e27 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 11 Oct 2019 16:18:10 +0200 Subject: [PATCH 12/47] =?UTF-8?q?=F0=9F=90=9B=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Install=20*.json?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index 26eff52..1c9075f 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,9 @@ setup( packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), install_requires=install_requires, + package_data={ + '': ['*.json'], + }, entry_points={ 'console_scripts': [ "sbb_textline_detector=qurator.sbb_textline_detector:main", From 419beed83685027f37bc0292400ac933a0b141dd Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Tue, 15 Oct 2019 13:24:27 +0200 Subject: [PATCH 13/47] Update main.py --- qurator/sbb_textline_detector/main.py | 259 +++++++++++++++++++++++++- 1 file changed, 256 insertions(+), 3 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index fdd5308..ba1855a 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1155,7 +1155,93 @@ class textlineerkenner: elif slope_for_all == 999: slope_for_all = slope_biggest self.slopes.append(slope_for_all) + def order_of_regions(self, textline_mask,contours_main): + mada_n=textline_mask.sum(axis=1) + y=mada_n[:] + y_help=np.zeros(len(y)+40) + y_help[20:len(y)+20]=y + x=np.array( range(len(y)) ) + + + peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) + + + sigma_gaus=8 + z= gaussian_filter1d(y_help, sigma_gaus) + zneg_rev=-y_help+np.max(y_help) + + zneg=np.zeros(len(zneg_rev)+40) + zneg[20:len(zneg_rev)+20]=zneg_rev + zneg= gaussian_filter1d(zneg, sigma_gaus) + + + peaks, _ = find_peaks(z, height=0) + peaks_neg, _ = find_peaks(zneg, height=0) + + peaks_neg=peaks_neg-20-20 + peaks=peaks-20 + + if contours_main!=None: + areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) + M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))] + cx_main=[(M_main[j]['m10']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] + cy_main=[(M_main[j]['m01']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] + x_min_main=np.array([np.min(contours_main[j][:,0,0]) for j in range(len(contours_main))]) + x_max_main=np.array([np.max(contours_main[j][:,0,0]) for j in range(len(contours_main))]) + + y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))]) + y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))]) + + + if contours_main!=None: + indexer_main=np.array(range(len(contours_main))) + + if contours_main!=None: + len_main=len(contours_main) + else: + len_main=0 + + matrix_of_orders=np.zeros((len_main,5)) + matrix_of_orders[:,0]=np.array( range( len_main ) ) + matrix_of_orders[:len_main,1]=1 + matrix_of_orders[len_main:,1]=2 + matrix_of_orders[:len_main,2]=cx_main + matrix_of_orders[:len_main,3]=cy_main + matrix_of_orders[:len_main,4]=np.array( range( len_main ) ) + + final_indexers_sorted=[] + for i in range(len(peaks_neg)-1): + top=peaks_neg[i] + down=peaks_neg[i+1] + + indexes_in=matrix_of_orders[:,0][(matrix_of_orders[:,3]>=top) & ((matrix_of_orders[:,3]=top) & ((matrix_of_orders[:,3] Date: Tue, 15 Oct 2019 14:03:09 +0200 Subject: [PATCH 14/47] Update main.py --- qurator/sbb_textline_detector/main.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index ba1855a..e182ed2 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1275,6 +1275,7 @@ class textlineerkenner: # img_v=np.zeros(text_patch_processed.shape) # img_v=cv2.fillPoly(img_v, pts =found_polygons, color=(255,255,255)) # sumi=np.sum(np.sum(self.all_text_images[jj],axis=2),axis=1) + """ def write_into_page_xml(self,contours,page_coord,dir_of_image,order_of_texts , id_of_texts): found_polygons_text_region=contours @@ -1314,7 +1315,7 @@ class textlineerkenner: page.set('textLineOrder',"top-to-bottom" ) - """ + page_print_sub=ET.SubElement(page, 'PrintSpace') coord_page = ET.SubElement(page_print_sub, 'Coords') points_page_print='' @@ -1333,7 +1334,7 @@ class textlineerkenner: points_page_print=points_page_print+' ' #print(points_co) coord_page.set('points',points_page_print) - """ + @@ -1550,7 +1551,7 @@ class textlineerkenner: tree = ET.ElementTree(data) tree.write(os.path.join(self.dir_out, self.f_name) + ".xml") - """ + def run(self): self.get_image_and_scales() image_page,page_coord=self.extract_page() @@ -1559,13 +1560,13 @@ class textlineerkenner: self.get_all_image_patches_based_on_text_regions(boxes,image_page) textline_mask_tot=self.textline_contours(image_page) - indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours) - order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted ) + #indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours) + #order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted ) self.get_textlines_for_each_textregions(textline_mask_tot,boxes) self.get_slopes_for_each_text_region(contours) self.deskew_textline_patches(contours, boxes) - self.write_into_page_xml(contours,page_coord,self.dir_out , order_of_texts , id_of_texts) + self.write_into_page_xml(contours,page_coord,self.dir_out ) @click.command() From 170247240145273162175e4693069a23bd86930b Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Tue, 15 Oct 2019 14:32:40 +0200 Subject: [PATCH 15/47] Update main.py --- qurator/sbb_textline_detector/main.py | 53 ++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index e182ed2..1e58586 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1155,6 +1155,7 @@ class textlineerkenner: elif slope_for_all == 999: slope_for_all = slope_biggest self.slopes.append(slope_for_all) + def order_of_regions(self, textline_mask,contours_main): mada_n=textline_mask.sum(axis=1) y=mada_n[:] @@ -1168,6 +1169,7 @@ class textlineerkenner: sigma_gaus=8 + z= gaussian_filter1d(y_help, sigma_gaus) zneg_rev=-y_help+np.max(y_help) @@ -1182,6 +1184,8 @@ class textlineerkenner: peaks_neg=peaks_neg-20-20 peaks=peaks-20 + + if contours_main!=None: areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))] @@ -1192,38 +1196,66 @@ class textlineerkenner: y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))]) y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))]) + #print(contours_main[0],np.shape(contours_main[0]),contours_main[0][:,0,0]) + + if contours_main!=None: indexer_main=np.array(range(len(contours_main))) + if contours_main!=None: len_main=len(contours_main) else: len_main=0 + matrix_of_orders=np.zeros((len_main,5)) + matrix_of_orders[:,0]=np.array( range( len_main ) ) + matrix_of_orders[:len_main,1]=1 matrix_of_orders[len_main:,1]=2 + matrix_of_orders[:len_main,2]=cx_main + + matrix_of_orders[:len_main,3]=cy_main + + + matrix_of_orders[:len_main,4]=np.array( range( len_main ) ) + #matrix_of_orders[len_main:,4]=np.array( range( len_head ) ) + + #print(matrix_of_orders) + + peaks_neg_new=[] + + peaks_neg_new.append(0) + for iii in range(len(peaks_neg)): + peaks_neg_new.append(peaks_neg[iii]) + + peaks_neg_new.append(textline_mask.shape[0]) + final_indexers_sorted=[] - for i in range(len(peaks_neg)-1): - top=peaks_neg[i] - down=peaks_neg[i+1] + for i in range(len(peaks_neg_new)-1): + top=peaks_neg_new[i] + down=peaks_neg_new[i+1] indexes_in=matrix_of_orders[:,0][(matrix_of_orders[:,3]>=top) & ((matrix_of_orders[:,3]=top) & ((matrix_of_orders[:,3] Date: Tue, 15 Oct 2019 18:08:47 +0200 Subject: [PATCH 16/47] =?UTF-8?q?=F0=9F=90=9B=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Use=20the=20correct=20image=20filename=20in=20the=20output?= =?UTF-8?q?=20PAGE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 1e58586..9c1ba35 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1339,7 +1339,7 @@ class textlineerkenner: page=ET.SubElement(data,'Page') - page.set('imageFilename',self.f_name+'.tif') + page.set('imageFilename', self.image_dir) page.set('imageHeight',str(self.height_org) ) page.set('imageWidth',str(self.width_org) ) page.set('type',"content") From 4201fa7d0f48b1864322db3b2c23d642ff06faf7 Mon Sep 17 00:00:00 2001 From: cneud Date: Wed, 16 Oct 2019 18:52:35 +0200 Subject: [PATCH 17/47] sbb_textline_detector: typo (polugons --> polygons) --- qurator/sbb_textline_detector/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 9c1ba35..6a1fce4 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -51,7 +51,7 @@ class textlineerkenner: self.model_region_dir = dir_models + '/model_strukturerkennung.h5' self.model_textline_dir = dir_models + '/model_textline.h5' - def find_polugons_size_filter(self, contours, median_area, scaler_up=1.2, scaler_down=0.8): + def find_polygons_size_filter(self, contours, median_area, scaler_up=1.2, scaler_down=0.8): found_polygons_early = list() for c in contours: @@ -986,7 +986,7 @@ class textlineerkenner: contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - contours_slope = contours # self.find_polugons_size_filter(contours,median_area=median_area,scaler_up=100,scaler_down=0.5) + contours_slope = contours # self.find_polygons_size_filter(contours,median_area=median_area,scaler_up=100,scaler_down=0.5) if len(contours_slope) > 0: for jv in range(len(contours_slope)): From 5d440857e7b3ac270496396920d134232f9cda66 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 18 Oct 2019 10:59:01 +0200 Subject: [PATCH 18/47] =?UTF-8?q?=F0=9F=A7=B9=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Delete=20textline=20session/model=20after=20using=20it?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/main.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 9c1ba35..f489595 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -683,6 +683,12 @@ class textlineerkenner: y_predi = mask_true y_predi = cv2.resize(y_predi, (img_org.shape[1], img_org.shape[0]), interpolation=cv2.INTER_NEAREST) + + session_textline.close() + + del model_textline + del session_textline + gc.collect() return y_predi def get_textlines_for_each_textregions(self, textline_mask_tot, boxes): From 7884ab93c614daa06bc43a6d6cc6cae13c1e7aee Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 18 Oct 2019 10:59:41 +0200 Subject: [PATCH 19/47] =?UTF-8?q?=F0=9F=A7=B9=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Destroy=20Keras=20session=20at=20the=20end=20of=20a=20run()?= =?UTF-8?q?=20to=20free=20up=20memory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index f489595..9c348de 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1607,6 +1607,9 @@ class textlineerkenner: self.deskew_textline_patches(contours, boxes) self.write_into_page_xml(contours,page_coord,self.dir_out , order_of_texts , id_of_texts) + # Destroy the current Keras session/graph to free memory + K.clear_session() + @click.command() @click.option('--image', '-i', help='image filename', type=click.Path(exists=True, dir_okay=False)) From 9eda8749852dc830048006487be5ff5de06b984e Mon Sep 17 00:00:00 2001 From: Kai Labusch Date: Sat, 19 Oct 2019 11:15:09 +0200 Subject: [PATCH 20/47] add missing requirement --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index acc0101..9092a51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ tensorflow-gpu < 2.0 scipy click ocrd >= 1.0.0b19 +flair From 0b7bc8d93e6fe4973e50d3606aa33e3097730283 Mon Sep 17 00:00:00 2001 From: Kai Labusch Date: Sat, 19 Oct 2019 11:15:59 +0200 Subject: [PATCH 21/47] add missing requirement --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9092a51..3bcc5bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,4 @@ tensorflow-gpu < 2.0 scipy click ocrd >= 1.0.0b19 -flair + From 47d972b4594727afeec57d4289aa29e2bff19d9a Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Tue, 22 Oct 2019 13:27:23 +0200 Subject: [PATCH 22/47] Update main.py --- qurator/sbb_textline_detector/main.py | 195 +++++++++++++------------- 1 file changed, 100 insertions(+), 95 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 612657a..1cc88ce 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1374,103 +1374,102 @@ class textlineerkenner: coord_page.set('points',points_page_print) """ - - - region_order=ET.SubElement(page, 'ReadingOrder') - region_order_sub = ET.SubElement(region_order, 'OrderedGroup') - - region_order_sub.set('id',"ro357564684568544579089") - - args_sort=np.argsort(order_of_texts) - for vj in args_sort: - name="coord_text_"+str(vj) - name = ET.SubElement(region_order_sub, 'RegionRefIndexed') - name.set('index',str(order_of_texts[vj]) ) - name.set('regionRef',id_of_texts[vj]) - - - id_indexer=0 - id_indexer_l=0 - - for mm in range(len(found_polygons_text_region)): - textregion=ET.SubElement(page, 'TextRegion') - - textregion.set('id','r'+str(id_indexer)) - id_indexer+=1 - - textregion.set('type','paragraph') - #if mm==0: - # textregion.set('type','heading') - #else: - # textregion.set('type','paragraph') - coord_text = ET.SubElement(textregion, 'Coords') - - points_co='' - for lmm in range(len(found_polygons_text_region[mm])): - if len(found_polygons_text_region[mm][lmm])==2: - points_co=points_co+str( int( (found_polygons_text_region[mm][lmm][0] +page_coord[2])/self.scale_x ) ) - points_co=points_co+',' - points_co=points_co+str( int( (found_polygons_text_region[mm][lmm][1] +page_coord[0])/self.scale_y ) ) - else: - points_co=points_co+str( int((found_polygons_text_region[mm][lmm][0][0] +page_coord[2])/self.scale_x) ) - points_co=points_co+',' - points_co=points_co+str( int((found_polygons_text_region[mm][lmm][0][1] +page_coord[0])/self.scale_y) ) - - if lmm<(len(found_polygons_text_region[mm])-1): - points_co=points_co+' ' - #print(points_co) - coord_text.set('points',points_co) + if len(contours)>0: + region_order=ET.SubElement(page, 'ReadingOrder') + region_order_sub = ET.SubElement(region_order, 'OrderedGroup') - - - for j in range(len(self.all_found_texline_polygons[mm])): - - textline=ET.SubElement(textregion, 'TextLine') - - textline.set('id','l'+str(id_indexer_l)) + region_order_sub.set('id',"ro357564684568544579089") + + args_sort=np.argsort(order_of_texts) + for vj in args_sort: + name="coord_text_"+str(vj) + name = ET.SubElement(region_order_sub, 'RegionRefIndexed') + name.set('index',str(order_of_texts[vj]) ) + name.set('regionRef',id_of_texts[vj]) + + + id_indexer=0 + id_indexer_l=0 + + for mm in range(len(found_polygons_text_region)): + textregion=ET.SubElement(page, 'TextRegion') + + textregion.set('id','r'+str(id_indexer)) + id_indexer+=1 - id_indexer_l+=1 + textregion.set('type','paragraph') + #if mm==0: + # textregion.set('type','heading') + #else: + # textregion.set('type','paragraph') + coord_text = ET.SubElement(textregion, 'Coords') - - coord = ET.SubElement(textline, 'Coords') - - texteq=ET.SubElement(textline, 'TextEquiv') - - uni=ET.SubElement(texteq, 'Unicode') - uni.text = ' ' - - #points = ET.SubElement(coord, 'Points') - points_co='' - for l in range(len(self.all_found_texline_polygons[mm][j])): - #point = ET.SubElement(coord, 'Point') - - - - #point.set('x',str(found_polygons[j][l][0])) - #point.set('y',str(found_polygons[j][l][1])) - if len(self.all_found_texline_polygons[mm][j][l])==2: - points_co=points_co+str( int( (self.all_found_texline_polygons[mm][j][l][0] +page_coord[2] - +self.all_box_coord[mm][2])/self.scale_x) ) + for lmm in range(len(found_polygons_text_region[mm])): + if len(found_polygons_text_region[mm][lmm])==2: + points_co=points_co+str( int( (found_polygons_text_region[mm][lmm][0] +page_coord[2])/self.scale_x ) ) points_co=points_co+',' - points_co=points_co+str( int( (self.all_found_texline_polygons[mm][j][l][1] +page_coord[0] - +self.all_box_coord[mm][0])/self.scale_y) ) + points_co=points_co+str( int( (found_polygons_text_region[mm][lmm][1] +page_coord[0])/self.scale_y ) ) else: - points_co=points_co+str( int( ( self.all_found_texline_polygons[mm][j][l][0][0] +page_coord[2] - +self.all_box_coord[mm][2])/self.scale_x ) ) + points_co=points_co+str( int((found_polygons_text_region[mm][lmm][0][0] +page_coord[2])/self.scale_x) ) points_co=points_co+',' - points_co=points_co+str( int( ( self.all_found_texline_polygons[mm][j][l][0][1] +page_coord[0] - +self.all_box_coord[mm][0])/self.scale_y) ) - - if l<(len(self.all_found_texline_polygons[mm][j])-1): + points_co=points_co+str( int((found_polygons_text_region[mm][lmm][0][1] +page_coord[0])/self.scale_y) ) + + if lmm<(len(found_polygons_text_region[mm])-1): points_co=points_co+' ' #print(points_co) - coord.set('points',points_co) + coord_text.set('points',points_co) - texteqreg=ET.SubElement(textregion, 'TextEquiv') - - unireg=ET.SubElement(texteqreg, 'Unicode') - unireg.text = ' ' + + + for j in range(len(self.all_found_texline_polygons[mm])): + + textline=ET.SubElement(textregion, 'TextLine') + + textline.set('id','l'+str(id_indexer_l)) + + id_indexer_l+=1 + + + coord = ET.SubElement(textline, 'Coords') + + texteq=ET.SubElement(textline, 'TextEquiv') + + uni=ET.SubElement(texteq, 'Unicode') + uni.text = ' ' + + #points = ET.SubElement(coord, 'Points') + + points_co='' + for l in range(len(self.all_found_texline_polygons[mm][j])): + #point = ET.SubElement(coord, 'Point') + + + + #point.set('x',str(found_polygons[j][l][0])) + #point.set('y',str(found_polygons[j][l][1])) + if len(self.all_found_texline_polygons[mm][j][l])==2: + points_co=points_co+str( int( (self.all_found_texline_polygons[mm][j][l][0] +page_coord[2] + +self.all_box_coord[mm][2])/self.scale_x) ) + points_co=points_co+',' + points_co=points_co+str( int( (self.all_found_texline_polygons[mm][j][l][1] +page_coord[0] + +self.all_box_coord[mm][0])/self.scale_y) ) + else: + points_co=points_co+str( int( ( self.all_found_texline_polygons[mm][j][l][0][0] +page_coord[2] + +self.all_box_coord[mm][2])/self.scale_x ) ) + points_co=points_co+',' + points_co=points_co+str( int( ( self.all_found_texline_polygons[mm][j][l][0][1] +page_coord[0] + +self.all_box_coord[mm][0])/self.scale_y) ) + + if l<(len(self.all_found_texline_polygons[mm][j])-1): + points_co=points_co+' ' + #print(points_co) + coord.set('points',points_co) + + texteqreg=ET.SubElement(textregion, 'TextEquiv') + + unireg=ET.SubElement(texteqreg, 'Unicode') + unireg.text = ' ' @@ -1596,15 +1595,21 @@ class textlineerkenner: image_page,page_coord=self.extract_page() text_regions=self.extract_text_regions(image_page) boxes,contours=self.get_text_region_contours_and_boxes(text_regions) - self.get_all_image_patches_based_on_text_regions(boxes,image_page) - textline_mask_tot=self.textline_contours(image_page) - - indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours) - order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted ) - self.get_textlines_for_each_textregions(textline_mask_tot,boxes) - self.get_slopes_for_each_text_region(contours) - self.deskew_textline_patches(contours, boxes) + if len(contours)>0: + self.get_all_image_patches_based_on_text_regions(boxes,image_page) + textline_mask_tot=self.textline_contours(image_page) + + indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours) + order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted ) + + self.get_textlines_for_each_textregions(textline_mask_tot,boxes) + self.get_slopes_for_each_text_region(contours) + self.deskew_textline_patches(contours, boxes) + else: + contours=[] + order_of_texts=None + id_of_texts=None self.write_into_page_xml(contours,page_coord,self.dir_out , order_of_texts , id_of_texts) # Destroy the current Keras session/graph to free memory From d8e04e3de4478d6f8dd595cd44edbac5529c32f4 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Fri, 25 Oct 2019 14:07:36 +0200 Subject: [PATCH 23/47] memory leakage is removed. New deskewing methid is integrated. --- qurator/sbb_textline_detector/main.py | 733 +++++++++++++++++++------- 1 file changed, 528 insertions(+), 205 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 1cc88ce..86ba3c1 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -79,7 +79,7 @@ class textlineerkenner: if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod( image.shape[:2]): # and hirarchy[0][jv][3]==-1 : found_polygons_early.append( - np.array([point for point in polygon.exterior.coords], dtype=np.uint)) + np.array([ [point] for point in polygon.exterior.coords], dtype=np.uint)) jv += 1 return found_polygons_early @@ -414,15 +414,11 @@ class textlineerkenner: img_width_page = model_page.layers[len(model_page.layers) - 1].output_shape[2] n_classes_page = model_page.layers[len(model_page.layers) - 1].output_shape[3] - img_org_copy = self.image.copy() - img = self.otsu_copy(self.image) for ii in range(60): img = cv2.GaussianBlur(img, (15, 15), 0) - # img=self.image.astype(np.uint8) - # img = cv2.medianBlur(img,5) img = img / 255.0 img = self.resize_image(img, img_height_page, img_width_page) @@ -432,19 +428,14 @@ class textlineerkenner: seg = np.argmax(label_p_pred, axis=3)[0] seg_color = self.color_images(seg, n_classes_page) + imgs = self.resize_image(seg_color, self.image.shape[0], self.image.shape[1]) - imgs = seg_color # /np.max(seg_color)*255#np.repeat(seg_color[:, :, np.newaxis], 3, axis=2) - - imgs = self.resize_image(imgs, img_org_copy.shape[0], img_org_copy.shape[1]) - - # plt.imshow(imgs*255) - # plt.show() imgs = imgs.astype(np.uint8) imgray = cv2.cvtColor(imgs, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) - thresh = cv2.dilate(thresh, self.kernel, iterations=30) + thresh = cv2.dilate(thresh, self.kernel, iterations=3) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) @@ -455,11 +446,23 @@ class textlineerkenner: box = [x, y, w, h] - croped_page, page_coord = self.crop_image_inside_box(box, img_org_copy) + croped_page, page_coord = self.crop_image_inside_box(box, self.image) + + self.cont_page=[] + self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , + [ page_coord[3] , page_coord[0] ] , + [ page_coord[3] , page_coord[1] ] , + [ page_coord[2] , page_coord[1] ]] ) ) session_page.close() del model_page del session_page + del self.image + del seg + del contours + del thresh + del imgs + del img gc.collect() return croped_page, page_coord @@ -477,7 +480,7 @@ class textlineerkenner: height = img_height_region # offset=int(.1*width) - offset = int(0.03 * width) + offset = int(0.1 * width) width_mid = width - 2 * offset height_mid = height - 2 * offset @@ -534,6 +537,8 @@ class textlineerkenner: if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - height + + img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] @@ -544,12 +549,77 @@ class textlineerkenner: seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - seg_color = seg_color[offset:seg_color.shape[0] - offset, offset:seg_color.shape[1] - offset, :] - seg = seg[offset:seg.shape[0] - offset, offset:seg.shape[1] - offset] + if i==0 and j==0: + seg_color = seg_color[0:seg_color.shape[0] - offset, 0:seg_color.shape[1] - offset, :] + seg = seg[0:seg.shape[0] - offset, 0:seg.shape[1] - offset] + + mask_true[index_y_d + 0:index_y_u - offset, index_x_d + 0:index_x_u - offset] = seg + prediction_true[index_y_d + 0:index_y_u - offset, index_x_d + 0:index_x_u - offset, + :] = seg_color + + elif i==nxf-1 and j==nyf-1: + seg_color = seg_color[offset:seg_color.shape[0] - 0, offset:seg_color.shape[1] - 0, :] + seg = seg[offset:seg.shape[0] - 0, offset:seg.shape[1] - 0] + + mask_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - 0] = seg + prediction_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - 0, + :] = seg_color + + elif i==0 and j==nyf-1: + seg_color = seg_color[offset:seg_color.shape[0] - 0, 0:seg_color.shape[1] - offset, :] + seg = seg[offset:seg.shape[0] - 0, 0:seg.shape[1] - offset] + + mask_true[index_y_d + offset:index_y_u - 0, index_x_d + 0:index_x_u - offset] = seg + prediction_true[index_y_d + offset:index_y_u - 0, index_x_d + 0:index_x_u - offset, + :] = seg_color + + elif i==nxf-1 and j==0: + seg_color = seg_color[0:seg_color.shape[0] - offset, offset:seg_color.shape[1] - 0, :] + seg = seg[0:seg.shape[0] - offset, offset:seg.shape[1] - 0] + + mask_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - 0] = seg + prediction_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - 0, + :] = seg_color + + elif i==0 and j!=0 and j!=nyf-1: + seg_color = seg_color[offset:seg_color.shape[0] - offset, 0:seg_color.shape[1] - offset, :] + seg = seg[offset:seg.shape[0] - offset, 0:seg.shape[1] - offset] + + mask_true[index_y_d + offset:index_y_u - offset, index_x_d + 0:index_x_u - offset] = seg + prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + 0:index_x_u - offset, + :] = seg_color + + elif i==nxf-1 and j!=0 and j!=nyf-1: + seg_color = seg_color[offset:seg_color.shape[0] - offset, offset:seg_color.shape[1] - 0, :] + seg = seg[offset:seg.shape[0] - offset, offset:seg.shape[1] - 0] + + mask_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - 0] = seg + prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - 0, + :] = seg_color + + elif i!=0 and i!=nxf-1 and j==0: + seg_color = seg_color[0:seg_color.shape[0] - offset, offset:seg_color.shape[1] - offset, :] + seg = seg[0:seg.shape[0] - offset, offset:seg.shape[1] - offset] + + mask_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - offset] = seg + prediction_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - offset, + :] = seg_color + + elif i!=0 and i!=nxf-1 and j==nyf-1: + seg_color = seg_color[offset:seg_color.shape[0] - 0, offset:seg_color.shape[1] - offset, :] + seg = seg[offset:seg.shape[0] - 0, offset:seg.shape[1] - offset] + + mask_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - offset] = seg + prediction_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - offset, + :] = seg_color + + else: + seg_color = seg_color[offset:seg_color.shape[0] - offset, offset:seg_color.shape[1] - offset, :] + seg = seg[offset:seg.shape[0] - offset, offset:seg.shape[1] - offset] - mask_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset] = seg - prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset, - :] = seg_color + mask_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset] = seg + prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset, + :] = seg_color prediction_true = prediction_true.astype(np.uint8) session_region.close() @@ -568,7 +638,7 @@ class textlineerkenner: image = cv2.morphologyEx(image, cv2.MORPH_OPEN, self.kernel) image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, self.kernel) - # image = cv2.erode(image,self.kernel,iterations = 3) + #image = cv2.erode(image,self.kernel,iterations = 2) # image = cv2.dilate(image,self.kernel,iterations = 3) @@ -579,7 +649,7 @@ class textlineerkenner: contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # commenst_contours=self.filter_contours_area_of_image(thresh,contours,hirarchy,max_area=0.0002,min_area=0.0001) - main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.0001) + main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.00001) img_comm = np.zeros(thresh.shape) img_comm_in = cv2.fillPoly(img_comm, pts=main_contours, color=(255, 255, 255)) @@ -606,12 +676,11 @@ class textlineerkenner: return boxes, contours_new def get_all_image_patches_based_on_text_regions(self, boxes, image_page): - self.all_text_images = [] - self.all_box_coord = [] + self.all_box_coord=[] for jk in range(len(boxes)): - crop_img, crop_coor = self.crop_image_inside_box(boxes[jk], image_page) - self.all_text_images.append(crop_img) - self.all_box_coord.append(crop_coor) + crop_img,crop_coor=self.crop_image_inside_box(boxes[jk],image_page) + self.all_box_coord.append(crop_coor) + del crop_img def textline_contours(self, img): model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir) @@ -627,7 +696,7 @@ class textlineerkenner: if img.shape[1] < img_width_textline: img = cv2.resize(img, (img_height_textline, img.shape[0]), interpolation=cv2.INTER_NEAREST) - margin = False + margin = True if not margin: width = img_width_textline @@ -684,6 +753,150 @@ class textlineerkenner: y_predi = mask_true y_predi = cv2.resize(y_predi, (img_org.shape[1], img_org.shape[0]), interpolation=cv2.INTER_NEAREST) + + + if margin: + + width = img_width_textline + height = img_height_textline + + # offset=int(.1*width) + offset = int(0.1 * width) + + width_mid = width - 2 * offset + height_mid = height - 2 * offset + + img = self.otsu_copy(img) + img = img.astype(np.uint8) + + img = img / 255.0 + + img_h = img.shape[0] + img_w = img.shape[1] + + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width_mid) + nyf = img_h / float(height_mid) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + else: + nxf = int(nxf) + + if nyf > int(nyf): + nyf = int(nyf) + 1 + else: + nyf = int(nyf) + + for i in range(nxf): + for j in range(nyf): + + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + width # (i+1)*width + elif i > 0: + index_x_d = i * width_mid + index_x_u = index_x_d + width # (i+1)*width + + if j == 0: + index_y_d = j * height_mid + index_y_u = index_y_d + height # (j+1)*height + elif j > 0: + index_y_d = j * height_mid + index_y_u = index_y_d + height # (j+1)*height + + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - width + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - height + + + + img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + label_p_pred = model_textline.predict( + img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + + seg = np.argmax(label_p_pred, axis=3)[0] + + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + + if i==0 and j==0: + seg_color = seg_color[0:seg_color.shape[0] - offset, 0:seg_color.shape[1] - offset, :] + seg = seg[0:seg.shape[0] - offset, 0:seg.shape[1] - offset] + + mask_true[index_y_d + 0:index_y_u - offset, index_x_d + 0:index_x_u - offset] = seg + prediction_true[index_y_d + 0:index_y_u - offset, index_x_d + 0:index_x_u - offset, + :] = seg_color + + elif i==nxf-1 and j==nyf-1: + seg_color = seg_color[offset:seg_color.shape[0] - 0, offset:seg_color.shape[1] - 0, :] + seg = seg[offset:seg.shape[0] - 0, offset:seg.shape[1] - 0] + + mask_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - 0] = seg + prediction_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - 0, + :] = seg_color + + elif i==0 and j==nyf-1: + seg_color = seg_color[offset:seg_color.shape[0] - 0, 0:seg_color.shape[1] - offset, :] + seg = seg[offset:seg.shape[0] - 0, 0:seg.shape[1] - offset] + + mask_true[index_y_d + offset:index_y_u - 0, index_x_d + 0:index_x_u - offset] = seg + prediction_true[index_y_d + offset:index_y_u - 0, index_x_d + 0:index_x_u - offset, + :] = seg_color + + elif i==nxf-1 and j==0: + seg_color = seg_color[0:seg_color.shape[0] - offset, offset:seg_color.shape[1] - 0, :] + seg = seg[0:seg.shape[0] - offset, offset:seg.shape[1] - 0] + + mask_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - 0] = seg + prediction_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - 0, + :] = seg_color + + elif i==0 and j!=0 and j!=nyf-1: + seg_color = seg_color[offset:seg_color.shape[0] - offset, 0:seg_color.shape[1] - offset, :] + seg = seg[offset:seg.shape[0] - offset, 0:seg.shape[1] - offset] + + mask_true[index_y_d + offset:index_y_u - offset, index_x_d + 0:index_x_u - offset] = seg + prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + 0:index_x_u - offset, + :] = seg_color + + elif i==nxf-1 and j!=0 and j!=nyf-1: + seg_color = seg_color[offset:seg_color.shape[0] - offset, offset:seg_color.shape[1] - 0, :] + seg = seg[offset:seg.shape[0] - offset, offset:seg.shape[1] - 0] + + mask_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - 0] = seg + prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - 0, + :] = seg_color + + elif i!=0 and i!=nxf-1 and j==0: + seg_color = seg_color[0:seg_color.shape[0] - offset, offset:seg_color.shape[1] - offset, :] + seg = seg[0:seg.shape[0] - offset, offset:seg.shape[1] - offset] + + mask_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - offset] = seg + prediction_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - offset, + :] = seg_color + + elif i!=0 and i!=nxf-1 and j==nyf-1: + seg_color = seg_color[offset:seg_color.shape[0] - 0, offset:seg_color.shape[1] - offset, :] + seg = seg[offset:seg.shape[0] - 0, offset:seg.shape[1] - offset] + + mask_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - offset] = seg + prediction_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - offset, + :] = seg_color + + else: + seg_color = seg_color[offset:seg_color.shape[0] - offset, offset:seg_color.shape[1] - offset, :] + seg = seg[offset:seg.shape[0] - offset, offset:seg.shape[1] - offset] + + mask_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset] = seg + prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset, + :] = seg_color + + y_predi = mask_true.astype(np.uint8) session_textline.close() del model_textline @@ -698,6 +911,7 @@ class textlineerkenner: for jk in range(len(boxes)): crop_img, crop_coor = self.crop_image_inside_box(boxes[jk], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) + crop_img=crop_img.astype(np.uint8) self.all_text_region_raw.append(crop_img[:, :, 0]) self.area_of_cropped.append(crop_img.shape[0] * crop_img.shape[1]) @@ -802,6 +1016,15 @@ class textlineerkenner: x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d + + if x_min_rot1<0: + x_min_rot1=0 + if x_min_rot4<0: + x_min_rot4=0 + if point_up_rot1<0: + point_up_rot1=0 + if point_up_rot2<0: + point_up_rot2=0 textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], @@ -832,6 +1055,16 @@ class textlineerkenner: x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d + + + if x_min_rot1<0: + x_min_rot1=0 + if x_min_rot4<0: + x_min_rot4=0 + if point_up_rot1<0: + point_up_rot1=0 + if point_up_rot2<0: + point_up_rot2=0 textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], @@ -881,6 +1114,15 @@ class textlineerkenner: x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d + + if x_min_rot1<0: + x_min_rot1=0 + if x_min_rot4<0: + x_min_rot4=0 + if point_up_rot1<0: + point_up_rot1=0 + if point_up_rot2<0: + point_up_rot2=0 textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], @@ -939,6 +1181,16 @@ class textlineerkenner: x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d + + + if x_min_rot1<0: + x_min_rot1=0 + if x_min_rot4<0: + x_min_rot4=0 + if point_up_rot1<0: + point_up_rot1=0 + if point_up_rot2<0: + point_up_rot2=0 textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], @@ -976,12 +1228,11 @@ class textlineerkenner: commenst_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=0.01, min_area=0.003) - main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.003) - # interior_contours=self.filter_contours_area_of_image_interiors(thresh,contours,hirarchy,max_area=1,min_area=0) + main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.0003) + img_comm = np.zeros(thresh.shape) img_comm_in = cv2.fillPoly(img_comm, pts=main_contours, color=(255, 255, 255)) - ###img_comm_in=cv2.fillPoly(img_comm, pts =interior_contours, color=(0,0,0)) img_comm_in = np.repeat(img_comm_in[:, :, np.newaxis], 3, axis=2) img_comm_in = img_comm_in.astype(np.uint8) @@ -1018,7 +1269,7 @@ class textlineerkenner: contour_text_copy = contour_text_interest.copy() contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[ - 0] # np.min(contour_text_interest_copy[:,0,0]) + 0] contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1] img_contour = np.zeros((box_ind[3], box_ind[2], 3)) @@ -1026,7 +1277,6 @@ class textlineerkenner: img_contour_rot = self.rotate_image(img_contour, slope) - # img_comm_in=np.repeat(img_comm_in[:, :, np.newaxis], 3, axis=2) img_contour_rot = img_contour_rot.astype(np.uint8) imgrayrot = cv2.cvtColor(img_contour_rot, cv2.COLOR_BGR2GRAY) _, threshrot = cv2.threshold(imgrayrot, 0, 255, 0) @@ -1081,9 +1331,7 @@ class textlineerkenner: contours, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - # commenst_contours=self.filter_contours_area_of_image(thresh,contours,hirarchy,max_area=0.01,min_area=0.003) main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.003) - # interior_contours=self.filter_contours_area_of_image_interiors(thresh,contours,hirarchy,max_area=1,min_area=0) textline_maskt = textline_mask[:, :, 0] textline_maskt[textline_maskt != 0] = 1 @@ -1091,10 +1339,8 @@ class textlineerkenner: _, peaks_point, _ = self.seperate_lines(textline_maskt, contour_interest, slope_new) mean_dis = np.mean(np.diff(peaks_point)) - # mean_dis=np.median(np.diff(peaks_point)) len_x = thresh.shape[1] - # print(len_x,mean_dis,'x') slope_lines = [] contours_slope_new = [] @@ -1106,7 +1352,6 @@ class textlineerkenner: yminh = np.min(main_contours[kk][:, 1]) ymaxh = np.max(main_contours[kk][:, 1]) - # print(xminh,xmaxh ,yminh,ymaxh,ymaxh-yminh) if ymaxh - yminh <= mean_dis and ( xmaxh - xminh) >= 0.3 * len_x: # xminh>=0.05*len_x and xminh<=0.4*len_x and xmaxh<=0.95*len_x and xmaxh>=0.6*len_x: @@ -1127,40 +1372,205 @@ class textlineerkenner: slope = 0 return slope + def return_contours_of_image(self,image_box_tabels_1): + + image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2) + image_box_tabels=image_box_tabels.astype(np.uint8) + imgray = cv2.cvtColor(image_box_tabels, cv2.COLOR_BGR2GRAY) + ret, thresh = cv2.threshold(imgray, 0, 255, 0) + contours,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + return contours + + def find_contours_mean_y_diff(self,contours_main): + M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))] + cy_main=[(M_main[j]['m01']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] + return np.mean( np.diff( np.sort( np.array(cy_main) ) ) ) + + + def isNaN(self,num): + return num != num + + def find_num_col(self,regions_without_seperators,sigma_,multiplier=3.8 ): + regions_without_seperators_0=regions_without_seperators[:,:].sum(axis=1) - def get_slopes_for_each_text_region(self, contours): + meda_n_updown=regions_without_seperators_0[len(regions_without_seperators_0)::-1] + + first_nonzero=(next((i for i, x in enumerate(regions_without_seperators_0) if x), 0)) + last_nonzero=(next((i for i, x in enumerate(meda_n_updown) if x), 0)) + + last_nonzero=len(regions_without_seperators_0)-last_nonzero + + + y=regions_without_seperators_0#[first_nonzero:last_nonzero] + + y_help=np.zeros(len(y)+20) + + y_help[10:len(y)+10]=y + + x=np.array( range(len(y)) ) + + + + + zneg_rev=-y_help+np.max(y_help) + + zneg=np.zeros(len(zneg_rev)+20) + + zneg[10:len(zneg_rev)+10]=zneg_rev + + z=gaussian_filter1d(y, sigma_) + zneg= gaussian_filter1d(zneg, sigma_) + + + peaks_neg, _ = find_peaks(zneg, height=0) + peaks, _ = find_peaks(z, height=0) + + peaks_neg=peaks_neg-10-10 + + - # first let find the slop for biggest patch of text region + last_nonzero=last_nonzero-0#100 + first_nonzero=first_nonzero+0#+100 + peaks_neg=peaks_neg[(peaks_neg>first_nonzero) & (peaks_neg.06*regions_without_seperators.shape[1]) & (peaks<0.94*regions_without_seperators.shape[1])] + + interest_pos=z[peaks] + + interest_pos=interest_pos[interest_pos>10] + + interest_neg=z[peaks_neg] + + + if interest_neg[0]<0.1: + interest_neg=interest_neg[1:] + if interest_neg[len(interest_neg)-1]<0.1: + interest_neg=interest_neg[:len(interest_neg)-1] + + + + min_peaks_pos=np.min(interest_pos) + min_peaks_neg=0#np.min(interest_neg) + + + dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier + grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 + + interest_neg_fin=interest_neg#[(interest_neg0]=1 + + + slope_biggest=self.return_deskew_slop(img_int_p,sigma_des) + + # this was the old method. By now it seems the new one works better. By the way more tests are required. + #slope_biggest = self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[index_max_area], + # denoised, contours[index_max_area]) + - if np.abs(slope_biggest) > 2.5: + if np.abs(slope_biggest) > 20: slope_biggest = 0 self.slopes = [] - for mv in range(len(self.all_text_images)): - denoised = cv2.blur(self.all_text_images[mv], (5, 5)) # otsu_copy(crop_img)# - denoised = cv2.medianBlur(denoised, 5) # cv2.GaussianBlur(crop_img, (5, 5), 0) - denoised = cv2.GaussianBlur(denoised, (5, 5), 0) - denoised = self.otsu_copy(denoised) - denoised = denoised.astype(np.uint8) - slope_for_all = self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[mv], denoised, - contours[mv]) - # text_patch_processed=textline_contours_postprocessing(gada) - - if np.abs(slope_for_all) > 2.5 and slope_for_all != 999: - slope_for_all = 0 - elif slope_for_all == 999: - slope_for_all = slope_biggest + for mv in range(len(self.all_text_region_raw)): + img_int_p=self.all_text_region_raw[mv] + + try: + textline_con=self.return_contours_of_image(img_int_p) + textline_con_fil=self.filter_contours_area_of_image(img_int_p,textline_con,denoised,max_area=1,min_area=0.0008) + y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil) + + sigma_des=int( y_diff_mean * (4./40.0) ) + + if sigma_des<1: + sigma_des=1 + + img_int_p[img_int_p>0]=1 + slope_for_all=self.return_deskew_slop(img_int_p,sigma_des) + + #old method + #slope_for_all=self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[mv],denoised,contours[mv]) + #text_patch_processed=textline_contours_postprocessing(gada) + + except: + slope_for_all=999 + + + if np.abs(slope_for_all)>12.5 and slope_for_all!=999: + slope_for_all=slope_biggest + elif slope_for_all==999: + slope_for_all=slope_biggest self.slopes.append(slope_for_all) + def order_of_regions(self, textline_mask,contours_main): mada_n=textline_mask.sum(axis=1) @@ -1202,7 +1612,6 @@ class textlineerkenner: y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))]) y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))]) - #print(contours_main[0],np.shape(contours_main[0]),contours_main[0][:,0,0]) @@ -1232,9 +1641,7 @@ class textlineerkenner: matrix_of_orders[:len_main,4]=np.array( range( len_main ) ) - #matrix_of_orders[len_main:,4]=np.array( range( len_head ) ) - - #print(matrix_of_orders) + peaks_neg_new=[] @@ -1283,22 +1690,14 @@ class textlineerkenner: def deskew_textline_patches(self, contours, boxes): self.all_text_region_processed = [] self.all_found_texline_polygons = [] + + denoised=None + + for jj in range(len(self.all_text_region_raw)): - for jj in range(len(self.all_text_images)): - # print(all_text_images[jj][0,0,0],np.unique(all_text_images[jj][:,:,0])) - ###gada=self.all_text_images[jj][:,:,0] - ###gada=(gada[:,:]==0)*1 - # print(gada[0,0]) - - denoised = cv2.blur(self.all_text_images[jj], (5, 5)) # otsu_copy(crop_img)# - denoised = cv2.medianBlur(denoised, 5) # cv2.GaussianBlur(crop_img, (5, 5), 0) - denoised = cv2.GaussianBlur(denoised, (5, 5), 0) - denoised = self.otsu_copy(denoised) - denoised = denoised.astype(np.uint8) text_patch_processed, cnt_clean_rot = self.textline_contours_postprocessing(self.all_text_region_raw[jj] , denoised, self.slopes[jj], contours[jj], boxes[jj]) - # text_patch_processed=textline_contours_postprocessing(gada) self.all_text_region_processed.append(text_patch_processed) text_patch_processed = text_patch_processed.astype(np.uint8) @@ -1307,12 +1706,8 @@ class textlineerkenner: _, thresh = cv2.threshold(imgray, 0, 255, 0) self.found_polygons, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - ####all_found_texline_polygons.append(found_polygons)cnt_clean_rot self.all_found_texline_polygons.append(cnt_clean_rot) - # img_v=np.zeros(text_patch_processed.shape) - # img_v=cv2.fillPoly(img_v, pts =found_polygons, color=(255,255,255)) - # sumi=np.sum(np.sum(self.all_text_images[jj],axis=2),axis=1) def write_into_page_xml(self,contours,page_coord,dir_of_image,order_of_texts , id_of_texts): @@ -1325,14 +1720,13 @@ class textlineerkenner: data.set('xmlns',"http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15") data.set('xmlns:xsi',"http://www.w3.org/2001/XMLSchema-instance") data.set('xsi:schemaLocation',"http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15") - #data.set('http',"http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15/pagecontent.xsd") metadata=ET.SubElement(data,'Metadata') author=ET.SubElement(metadata, 'Creator') - author.text = 'Vahid' + author.text = 'SBB_QURATOR' created=ET.SubElement(metadata, 'Created') @@ -1353,26 +1747,25 @@ class textlineerkenner: page.set('textLineOrder',"top-to-bottom" ) - """ + page_print_sub=ET.SubElement(page, 'PrintSpace') coord_page = ET.SubElement(page_print_sub, 'Coords') points_page_print='' - for lmm in range(len(cont_page[0])): - if len(cont_page[0][lmm])==2: - points_page_print=points_page_print+str( int( (cont_page[0][lmm][0])/self.scale_x ) ) + for lmm in range(len(self.cont_page[0])): + if len(self.cont_page[0][lmm])==2: + points_page_print=points_page_print+str( int( (self.cont_page[0][lmm][0])/self.scale_x ) ) points_page_print=points_page_print+',' - points_page_print=points_page_print+str( int( (cont_page[0][lmm][1])/self.scale_y ) ) + points_page_print=points_page_print+str( int( (self.cont_page[0][lmm][1])/self.scale_y ) ) else: - points_page_print=points_page_print+str( int((cont_page[0][lmm][0][0])/self.scale_x) ) + points_page_print=points_page_print+str( int((self.cont_page[0][lmm][0][0])/self.scale_x) ) points_page_print=points_page_print+',' - points_page_print=points_page_print+str( int((cont_page[0][lmm][0][1])/self.scale_y) ) + points_page_print=points_page_print+str( int((self.cont_page[0][lmm][0][1])/self.scale_y) ) - if lmm<(len(cont_page[0])-1): + if lmm<(len(self.cont_page[0])-1): points_page_print=points_page_print+' ' - #print(points_co) coord_page.set('points',points_page_print) - """ + if len(contours)>0: region_order=ET.SubElement(page, 'ReadingOrder') @@ -1477,135 +1870,65 @@ class textlineerkenner: tree = ET.ElementTree(data) tree.write(os.path.join(self.dir_out, self.f_name) + ".xml") - """ - def write_into_page_xml(self, contours, page_coord): - - found_polygons_text_region = contours - data = ET.Element('PcGts') - - data.set('xmlns', "http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15") - data.set('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance") - data.set('xsi:schemaLocation', "http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15") - # data.set('http',"http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15/pagecontent.xsd") - - metadata = ET.SubElement(data, 'Metadata') - - author = ET.SubElement(metadata, 'Creator') - author.text = 'Vahid' - - created = ET.SubElement(metadata, 'Created') - created.text = '2019-06-17T18:15:12' - - changetime = ET.SubElement(metadata, 'LastChange') - changetime.text = '2019-06-17T18:15:12' - - page = ET.SubElement(data, 'Page') - - page.set('imageFilename', self.image_dir) - page.set('imageHeight', str(self.height_org)) - page.set('imageWidth', str(self.width_org)) - - id_indexer = 0 - - for mm in range(len(found_polygons_text_region)): - textregion = ET.SubElement(page, 'TextRegion') - - textregion.set('id', 'r' + str(id_indexer)) - id_indexer += 1 - - if mm == 0: - textregion.set('type', 'heading') - else: - textregion.set('type', 'paragraph') - coord_text = ET.SubElement(textregion, 'Coords') - - points_co = '' - for lmm in range(len(found_polygons_text_region[mm])): - - if len(found_polygons_text_region[mm][lmm]) == 2: - points_co = points_co + str( - int((found_polygons_text_region[mm][lmm][0] + page_coord[2]) / self.scale_x)) - points_co = points_co + ',' - points_co = points_co + str( - int((found_polygons_text_region[mm][lmm][1] + page_coord[0]) / self.scale_y)) - else: - points_co = points_co + str( - int((found_polygons_text_region[mm][lmm][0][0] + page_coord[2]) / self.scale_x)) - points_co = points_co + ',' - points_co = points_co + str( - int((found_polygons_text_region[mm][lmm][0][1] + page_coord[0]) / self.scale_y)) - - if lmm < (len(found_polygons_text_region[mm]) - 1): - points_co = points_co + ' ' - # print(points_co) - coord_text.set('points', points_co) - - for j in range(len(self.all_found_texline_polygons[mm])): - - textline = ET.SubElement(textregion, 'TextLine') - - textline.set('id', 'l' + str(id_indexer)) - - id_indexer += 1 - - coord = ET.SubElement(textline, 'Coords') - - texteq = ET.SubElement(textline, 'TextEquiv') - - uni = ET.SubElement(texteq, 'Unicode') - uni.text = ' ' - - # points = ET.SubElement(coord, 'Points') - - points_co = '' - for l in range(len(self.all_found_texline_polygons[mm][j])): - # point = ET.SubElement(coord, 'Point') - - # point.set('x',str(found_polygons[j][l][0])) - # point.set('y',str(found_polygons[j][l][1])) - if len(self.all_found_texline_polygons[mm][j][l]) == 2: - points_co = points_co + str(int((self.all_found_texline_polygons[mm][j][l][0] + page_coord[2] - + self.all_box_coord[mm][2]) / self.scale_x)) - points_co = points_co + ',' - points_co = points_co + str(int((self.all_found_texline_polygons[mm][j][l][1] + page_coord[0] - + self.all_box_coord[mm][0]) / self.scale_y)) - else: - points_co = points_co + str(int((self.all_found_texline_polygons[mm][j][l][0][0] + page_coord[2] - + self.all_box_coord[mm][2]) / self.scale_x)) - points_co = points_co + ',' - points_co = points_co + str(int((self.all_found_texline_polygons[mm][j][l][0][1] + page_coord[0] - + self.all_box_coord[mm][0]) / self.scale_y)) - - if l < (len(self.all_found_texline_polygons[mm][j]) - 1): - points_co = points_co + ' ' - # print(points_co) - coord.set('points', points_co) - - texteqreg = ET.SubElement(textregion, 'TextEquiv') - - unireg = ET.SubElement(texteqreg, 'Unicode') - unireg.text = ' ' - - tree = ET.ElementTree(data) - tree.write(os.path.join(self.dir_out, self.f_name) + ".xml") - """ + def run(self): + + #get image and sclaes, then extract the page of scanned image self.get_image_and_scales() image_page,page_coord=self.extract_page() + + ########## + K.clear_session() + gc.collect() + + # extract text regions and corresponding contours and surrounding box text_regions=self.extract_text_regions(image_page) boxes,contours=self.get_text_region_contours_and_boxes(text_regions) + ########## + K.clear_session() + gc.collect() + if len(contours)>0: + self.get_all_image_patches_based_on_text_regions(boxes,image_page) + + ########## + gc.collect() + + # extracting textlines using segmentation textline_mask_tot=self.textline_contours(image_page) + ########## + K.clear_session() + gc.collect() + + # get orders of each textregion. This method by now only works for one column documents. indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours) order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted ) + ########## + gc.collect() + + + # just get the textline result for each box of text regions self.get_textlines_for_each_textregions(textline_mask_tot,boxes) + + ########## + gc.collect() + + # calculate the slope for deskewing for each box of text region. self.get_slopes_for_each_text_region(contours) + + ########## + gc.collect() + + # do deskewing for each box of text region. self.deskew_textline_patches(contours, boxes) + + ########## + gc.collect() else: contours=[] order_of_texts=None From 2528573b4f8d8c6b21cf94a31295557a41a7db62 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 25 Oct 2019 14:16:09 +0200 Subject: [PATCH 24/47] =?UTF-8?q?=E2=9C=A8=20sbb=5Ftextline=5Fdetector:=20?= =?UTF-8?q?Allow=20PAGE=20input=20in=20OCR-D=20interface?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous OCR-D processors may output PAGE files instead of image files. Resolve images file from PAGE files if necessary. --- qurator/sbb_textline_detector/ocrd_cli.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/qurator/sbb_textline_detector/ocrd_cli.py b/qurator/sbb_textline_detector/ocrd_cli.py index 029c185..2a98104 100644 --- a/qurator/sbb_textline_detector/ocrd_cli.py +++ b/qurator/sbb_textline_detector/ocrd_cli.py @@ -4,7 +4,9 @@ import os import click from ocrd import Processor from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor -from ocrd_utils import concat_padded, getLogger +from ocrd_modelfactory import page_from_file +from ocrd_models import OcrdFile +from ocrd_utils import concat_padded, getLogger, MIMETYPE_PAGE from pkg_resources import resource_string from qurator.sbb_textline_detector import textlineerkenner @@ -32,12 +34,22 @@ class OcrdSbbTextlineDetectorRecognize(Processor): file_id = concat_padded(self.output_file_grp, n) return file_id + def _resolve_image_file(self, input_file: OcrdFile) -> str: + if input_file.mimetype == MIMETYPE_PAGE: + pcgts = page_from_file(self.workspace.download_file(input_file)) + page = pcgts.get_Page() + image_file = page.imageFilename + else: + image_file = input_file.local_filename + return image_file + def process(self): for n, page_id in enumerate(self.workspace.mets.physical_pages): - image_file = self.workspace.mets.find_files(fileGrp=self.input_file_grp, pageId=page_id)[0] - log.info("INPUT FILE %i / %s", n, image_file) + input_file = self.workspace.mets.find_files(fileGrp=self.input_file_grp, pageId=page_id)[0] + log.info("INPUT FILE %i / %s", n, input_file) - file_id = self._make_file_id(image_file, self.output_file_grp, n) + file_id = self._make_file_id(input_file, self.output_file_grp, n) + image_file = self._resolve_image_file(input_file) # Process the files try: @@ -46,7 +58,7 @@ class OcrdSbbTextlineDetectorRecognize(Processor): pass model = self.parameter['model'] - x = textlineerkenner(image_file.local_filename, self.output_file_grp, file_id, model) + x = textlineerkenner(image_file, self.output_file_grp, file_id, model) x.run() self.workspace.add_file( From 00929ab3916a896ab93fed3c57c27fd1c7b8b4ac Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Fri, 25 Oct 2019 14:39:37 +0200 Subject: [PATCH 25/47] Update main.py --- qurator/sbb_textline_detector/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 86ba3c1..a28bfdc 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -696,16 +696,16 @@ class textlineerkenner: if img.shape[1] < img_width_textline: img = cv2.resize(img, (img_height_textline, img.shape[0]), interpolation=cv2.INTER_NEAREST) - margin = True + margin = False if not margin: width = img_width_textline height = img_height_textline - img = self.otsu_copy(img) + #img = self.otsu_copy(img) img = img.astype(np.uint8) # for _ in range(4): - # img = cv2.medianBlur(img,5) + img = cv2.medianBlur(img,5) img = img / 255.0 img_h = img.shape[0] From 4f28cd905acf46b264a96960015131126a25d7d7 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Fri, 25 Oct 2019 18:08:31 +0200 Subject: [PATCH 26/47] =?UTF-8?q?=F0=9F=A7=B9=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Do=20not=20create=20empty/space-only=20TextEquivs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ocrd_tesserocr or ocrd_cis complain about already existing text if empty/space-only TextEquivs elements exist after segmentation. Also, it does not make sense to create them in a segmentation step. Fix by removing the code generating the elements. --- qurator/sbb_textline_detector/main.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 86ba3c1..5aca833 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1826,10 +1826,6 @@ class textlineerkenner: coord = ET.SubElement(textline, 'Coords') - texteq=ET.SubElement(textline, 'TextEquiv') - - uni=ET.SubElement(texteq, 'Unicode') - uni.text = ' ' #points = ET.SubElement(coord, 'Points') @@ -1859,10 +1855,6 @@ class textlineerkenner: #print(points_co) coord.set('points',points_co) - texteqreg=ET.SubElement(textregion, 'TextEquiv') - - unireg=ET.SubElement(texteqreg, 'Unicode') - unireg.text = ' ' From 719824f19d5bb9ca2034960aed6c8314782f4454 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Wed, 30 Oct 2019 13:37:54 +0100 Subject: [PATCH 27/47] Update main.py --- qurator/sbb_textline_detector/main.py | 216 +++++++++++++++----------- 1 file changed, 129 insertions(+), 87 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 86fba4e..22c123a 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -416,7 +416,7 @@ class textlineerkenner: img = self.otsu_copy(self.image) - for ii in range(60): + for ii in range(1): img = cv2.GaussianBlur(img, (15, 15), 0) @@ -487,7 +487,7 @@ class textlineerkenner: img = self.otsu_copy(img) img = img.astype(np.uint8) - ###img = cv2.medianBlur(img,5) + img = cv2.medianBlur(img,5) # img = cv2.medianBlur(img,5) @@ -665,22 +665,22 @@ class textlineerkenner: contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - boxes = [] + self.boxes = [] contours_new = [] for jj in range(len(contours)): if hirarchy[0][jj][2] == -1: x, y, w, h = cv2.boundingRect(contours[jj]) - boxes.append([x, y, w, h]) + self.boxes.append([x, y, w, h]) contours_new.append(contours[jj]) - return boxes, contours_new + return contours_new - def get_all_image_patches_based_on_text_regions(self, boxes, image_page): + def get_all_image_patches_coordination(self, image_page): self.all_box_coord=[] - for jk in range(len(boxes)): - crop_img,crop_coor=self.crop_image_inside_box(boxes[jk],image_page) + for jk in range(len(self.boxes)): + _,crop_coor=self.crop_image_inside_box(self.boxes[jk],image_page) self.all_box_coord.append(crop_coor) - del crop_img + def textline_contours(self, img): model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir) @@ -702,7 +702,7 @@ class textlineerkenner: width = img_width_textline height = img_height_textline - #img = self.otsu_copy(img) + img = self.otsu_copy(img) img = img.astype(np.uint8) # for _ in range(4): img = cv2.medianBlur(img,5) @@ -915,8 +915,8 @@ class textlineerkenner: self.all_text_region_raw.append(crop_img[:, :, 0]) self.area_of_cropped.append(crop_img.shape[0] * crop_img.shape[1]) - def seperate_lines(self, mada, contour_text_interest, thetha): - (h, w) = mada.shape[:2] + def seperate_lines(self, img_path, contour_text_interest, thetha): + (h, w) = img_path.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, -thetha, 1.0) x_d = M[0, 2] @@ -932,13 +932,13 @@ class textlineerkenner: y_cont = y_cont - np.min(y_cont) x_min_cont = 0 - x_max_cont = mada.shape[1] + x_max_cont = img_path.shape[1] y_min_cont = 0 - y_max_cont = mada.shape[0] + y_max_cont = img_path.shape[0] xv = np.linspace(x_min_cont, x_max_cont, 1000) - mada_n = mada.sum(axis=1) + mada_n = img_path.sum(axis=1) first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) @@ -988,11 +988,11 @@ class textlineerkenner: point_down_narrow = peaks[jj] + first_nonzero + int( 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) - if point_down >= mada.shape[0]: - point_down = mada.shape[0] - 2 + if point_down >= img_path.shape[0]: + point_down = img_path.shape[0] - 2 - if point_down_narrow >= mada.shape[0]: - point_down_narrow = mada.shape[0] - 2 + if point_down_narrow >= img_path.shape[0]: + point_down_narrow = img_path.shape[0] - 2 distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) for mj in range(len(xv))] @@ -1088,8 +1088,8 @@ class textlineerkenner: point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) elif jj == 1: point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) - if point_down >= mada.shape[0]: - point_down = mada.shape[0] - 2 + if point_down >= img_path.shape[0]: + point_down = img_path.shape[0] - 2 point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) @@ -1148,8 +1148,8 @@ class textlineerkenner: dis_to_next = peaks[jj] - peaks[jj - 1] # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next) point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) - if point_down >= mada.shape[0]: - point_down = mada.shape[0] - 2 + if point_down >= img_path.shape[0]: + point_down = img_path.shape[0] - 2 # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) else: @@ -1191,6 +1191,8 @@ class textlineerkenner: point_up_rot1=0 if point_up_rot2<0: point_up_rot2=0 + + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], [int(x_max_rot2), int(point_up_rot2)], @@ -1202,13 +1204,27 @@ class textlineerkenner: [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) - mada_new = np.zeros((mada.shape[0], mada.shape[1], 3)) + mada_new = np.zeros((img_path.shape[0], img_path.shape[1], 3)) mada_new = cv2.fillPoly(mada_new, pts=textline_boxes, color=(255, 255, 255)) mada_new = mada_new.astype(np.uint8) return mada_new, peaks, textline_boxes_rot + + def ruturn_rotated_contours(self,slope,img_patch): + dst = self.rotate_image(img_patch, slope) + dst = dst.astype(np.uint8) + dst = dst[:, :, 0] + dst[dst != 0] = 1 + + imgray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(imgray, 0, 255, 0) - def textline_contours_postprocessing(self, textline_mask, img_patch, slope, contour_text_interest, box_ind): + thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) + thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) + contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + return contours + + def textline_contours_postprocessing(self, textline_mask, slope, contour_text_interest, box_ind): textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 @@ -1243,24 +1259,7 @@ class textlineerkenner: contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - contours_slope = contours # self.find_polygons_size_filter(contours,median_area=median_area,scaler_up=100,scaler_down=0.5) - - if len(contours_slope) > 0: - for jv in range(len(contours_slope)): - new_poly = list(contours_slope[jv]) - if jv == 0: - merged_all = new_poly - else: - merged_all = merged_all + new_poly - - merge = np.array(merged_all) - - img_in = np.zeros(textline_mask.shape) - img_p_in = cv2.fillPoly(img_in, pts=[merge], color=(255, 255, 255)) - - rect = cv2.minAreaRect(merge) - box = cv2.boxPoints(rect) - box = np.int0(box) + if len(contours) > 0: dst = self.rotate_image(textline_mask, slope) dst = dst[:, :, 0] @@ -1275,6 +1274,8 @@ class textlineerkenner: img_contour = np.zeros((box_ind[3], box_ind[2], 3)) img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=(255, 255, 255)) + + img_contour_rot = self.rotate_image(img_contour, slope) img_contour_rot = img_contour_rot.astype(np.uint8) @@ -1285,11 +1286,11 @@ class textlineerkenner: len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))] ind_big_con = np.argmax(len_con_text_rot) - textline_maskt = textline_mask[:, :, 0] - textline_maskt[textline_maskt != 0] = 1 + sep_img, _, contours_rotated_clean = self.seperate_lines(dst, contours_text_rot[ind_big_con], slope) + dst = self.rotate_image(sep_img, -slope) imgray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY) @@ -1510,43 +1511,46 @@ class textlineerkenner: return ang_int - def get_slopes_for_each_text_region(self, contours): + def get_slopes_for_each_text_region(self, contours,textline_mask_tot): # first lets find slope for biggest patch of text region (slope of deskewing) - denoised=None - index_max_area = np.argmax(self.area_of_cropped) - img_int_p=self.all_text_region_raw[index_max_area] - textline_con=self.return_contours_of_image(img_int_p) - textline_con_fil=self.filter_contours_area_of_image(img_int_p,textline_con,denoised,max_area=1,min_area=0.0008) - y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil) + + #index_max_area = np.argmax(self.area_of_cropped) + #img_int_p=self.all_text_region_raw[index_max_area] + #textline_con=self.return_contours_of_image(img_int_p) + #textline_con_fil=self.filter_contours_area_of_image(img_int_p,textline_con,denoised,max_area=1,min_area=0.0008) + #y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil) - sigma_des=int( y_diff_mean * (4./40.0) ) + #sigma_des=int( y_diff_mean * (4./40.0) ) #refrence : sigma =4 for diff=40 - if sigma_des<1: - sigma_des=1 + #if sigma_des<1: + # sigma_des=1 - img_int_p[img_int_p>0]=1 + #img_int_p[img_int_p>0]=1 - slope_biggest=self.return_deskew_slop(img_int_p,sigma_des) + slope_biggest=0#self.return_deskew_slop(img_int_p,sigma_des) # this was the old method. By now it seems the new one works better. By the way more tests are required. #slope_biggest = self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[index_max_area], # denoised, contours[index_max_area]) - if np.abs(slope_biggest) > 20: - slope_biggest = 0 + #if np.abs(slope_biggest) > 20: + # slope_biggest = 0 self.slopes = [] - for mv in range(len(self.all_text_region_raw)): - img_int_p=self.all_text_region_raw[mv] + for mv in range(len(self.boxes)): + textline_mask_tot = cv2.erode(textline_mask_tot, self.kernel, iterations=1) + + crop_img, _ = self.crop_image_inside_box(self.boxes[mv], + np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) try: - textline_con=self.return_contours_of_image(img_int_p) - textline_con_fil=self.filter_contours_area_of_image(img_int_p,textline_con,denoised,max_area=1,min_area=0.0008) + textline_con=self.return_contours_of_image(crop_img) + textline_con_fil=self.filter_contours_area_of_image(crop_img,textline_con,denoised,max_area=1,min_area=0.0008) y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil) sigma_des=int( y_diff_mean * (4./40.0) ) @@ -1554,22 +1558,22 @@ class textlineerkenner: if sigma_des<1: sigma_des=1 - img_int_p[img_int_p>0]=1 - slope_for_all=self.return_deskew_slop(img_int_p,sigma_des) + crop_img[crop_img>0]=1 + slope_corresponding_textregion=self.return_deskew_slop(crop_img,sigma_des) #old method - #slope_for_all=self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[mv],denoised,contours[mv]) + #slope_corresponding_textregion=self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[mv],denoised,contours[mv]) #text_patch_processed=textline_contours_postprocessing(gada) except: - slope_for_all=999 + slope_corresponding_textregion=999 - if np.abs(slope_for_all)>12.5 and slope_for_all!=999: - slope_for_all=slope_biggest - elif slope_for_all==999: - slope_for_all=slope_biggest - self.slopes.append(slope_for_all) + if np.abs(slope_corresponding_textregion)>12.5 and slope_corresponding_textregion!=999: + slope_corresponding_textregion=slope_biggest + elif slope_corresponding_textregion==999: + slope_corresponding_textregion=slope_biggest + self.slopes.append(slope_corresponding_textregion) def order_of_regions(self, textline_mask,contours_main): @@ -1687,17 +1691,19 @@ class textlineerkenner: order_of_texts return order_of_texts, id_of_texts - def deskew_textline_patches(self, contours, boxes): + def deskew_textline_patches(self, contours,textline_mask_tot): self.all_text_region_processed = [] self.all_found_texline_polygons = [] - denoised=None - - for jj in range(len(self.all_text_region_raw)): - text_patch_processed, cnt_clean_rot = self.textline_contours_postprocessing(self.all_text_region_raw[jj] - , denoised, self.slopes[jj], - contours[jj], boxes[jj]) + for jj in range(len(self.boxes)): + + crop_img, _ = self.crop_image_inside_box(self.boxes[jj], + np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) + + text_patch_processed, cnt_clean_rot = self.textline_contours_postprocessing(crop_img[:,:,0] + , self.slopes[jj], + contours[jj], self.boxes[jj]) self.all_text_region_processed.append(text_patch_processed) text_patch_processed = text_patch_processed.astype(np.uint8) @@ -1826,6 +1832,10 @@ class textlineerkenner: coord = ET.SubElement(textline, 'Coords') + texteq=ET.SubElement(textline, 'TextEquiv') + + uni=ET.SubElement(texteq, 'Unicode') + uni.text = ' ' #points = ET.SubElement(coord, 'Points') @@ -1855,6 +1865,10 @@ class textlineerkenner: #print(points_co) coord.set('points',points_co) + texteqreg=ET.SubElement(textregion, 'TextEquiv') + + unireg=ET.SubElement(texteqreg, 'Unicode') + unireg.text = ' ' @@ -1867,35 +1881,47 @@ class textlineerkenner: def run(self): #get image and sclaes, then extract the page of scanned image + t1=time.time() self.get_image_and_scales() image_page,page_coord=self.extract_page() + ########## K.clear_session() gc.collect() + t2=time.time() + # extract text regions and corresponding contours and surrounding box text_regions=self.extract_text_regions(image_page) - boxes,contours=self.get_text_region_contours_and_boxes(text_regions) + contours=self.get_text_region_contours_and_boxes(text_regions) + + ########## K.clear_session() gc.collect() + t3=time.time() + + if len(contours)>0: - self.get_all_image_patches_based_on_text_regions(boxes,image_page) + self.get_all_image_patches_coordination(image_page) ########## gc.collect() # extracting textlines using segmentation textline_mask_tot=self.textline_contours(image_page) - + #plt.imshow(textline_mask_tot) + #plt.show() ########## K.clear_session() gc.collect() + t4=time.time() + # get orders of each textregion. This method by now only works for one column documents. indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours) order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted ) @@ -1903,24 +1929,31 @@ class textlineerkenner: ########## gc.collect() - + t5=time.time() + # just get the textline result for each box of text regions - self.get_textlines_for_each_textregions(textline_mask_tot,boxes) + #self.get_textlines_for_each_textregions(textline_mask_tot) ########## - gc.collect() + # calculate the slope for deskewing for each box of text region. - self.get_slopes_for_each_text_region(contours) + self.get_slopes_for_each_text_region(contours,textline_mask_tot) + ########## gc.collect() + t6=time.time() + # do deskewing for each box of text region. - self.deskew_textline_patches(contours, boxes) + self.deskew_textline_patches(contours,textline_mask_tot) ########## gc.collect() + + t7=time.time() + else: contours=[] order_of_texts=None @@ -1929,7 +1962,16 @@ class textlineerkenner: # Destroy the current Keras session/graph to free memory K.clear_session() + + print( "time total = "+"{0:.2f}".format(time.time()-t1) ) + print( "time needed for page extraction = "+"{0:.2f}".format(t2-t1) ) + print( "time needed for text region extraction and get contours = "+"{0:.2f}".format(t3-t2) ) + print( "time needed for textlines = "+"{0:.2f}".format(t4-t3) ) + print( "time needed to get order of regions = "+"{0:.2f}".format(t5-t4) ) + print( "time needed to get slopes of regions (deskewing) = "+"{0:.2f}".format(t6-t5) ) + print( "time needed to implement deskewing = "+"{0:.2f}".format(t7-t6) ) + @click.command() @click.option('--image', '-i', help='image filename', type=click.Path(exists=True, dir_okay=False)) From 6714481556f020a1ae6ae99cc5233de591e5571a Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 31 Oct 2019 10:54:57 +0100 Subject: [PATCH 28/47] Update main.py --- qurator/sbb_textline_detector/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 22c123a..3fc0cd3 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -24,6 +24,7 @@ from scipy.ndimage import gaussian_filter1d import xml.etree.ElementTree as ET import warnings import click +import time with warnings.catch_warnings(): warnings.simplefilter("ignore") From 8c954a6c7abef7d6deb0c48138f32556154ada97 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 31 Oct 2019 17:08:35 +0100 Subject: [PATCH 29/47] Update main.py --- qurator/sbb_textline_detector/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 3fc0cd3..406b38f 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1550,6 +1550,7 @@ class textlineerkenner: np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) try: + denoised=None textline_con=self.return_contours_of_image(crop_img) textline_con_fil=self.filter_contours_area_of_image(crop_img,textline_con,denoised,max_area=1,min_area=0.0008) y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil) From 9f97f34255ee0d9b62381c1d61a3ff71e6e7d6c7 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 31 Oct 2019 17:36:21 +0100 Subject: [PATCH 30/47] Update main.py --- qurator/sbb_textline_detector/main.py | 158 +++++--------------------- 1 file changed, 27 insertions(+), 131 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 406b38f..860a552 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -264,37 +264,7 @@ class textlineerkenner: iou = tf.gather(iou, indices=tf.where(legal_labels)) return K.mean(iou) - def IoU(self, Yi, y_predi): - ## mean Intersection over Union - ## Mean IoU = TP/(FN + TP + FP) - - IoUs = [] - Nclass = np.unique(Yi) - for c in Nclass: - TP = np.sum((Yi == c) & (y_predi == c)) - FP = np.sum((Yi != c) & (y_predi == c)) - FN = np.sum((Yi == c) & (y_predi != c)) - IoU = TP / float(TP + FP + FN) - print("class {:02.0f}: #TP={:6.0f}, #FP={:6.0f}, #FN={:5.0f}, IoU={:4.3f}".format(c, TP, FP, FN, IoU)) - IoUs.append(IoU) - mIoU = np.mean(IoUs) - print("_________________") - print("Mean IoU: {:4.3f}".format(mIoU)) - return mIoU - - def IoU_case(self, Yi, y_predi, n_classes): - ## mean Intersection over Union - ## Mean IoU = TP/(FN + TP + FP) - - IoUs = [] - - Nclass = n_classes - for c in range(Nclass): - TP = np.sum((Yi == c) & (y_predi == c)) - FP = np.sum((Yi != c) & (y_predi == c)) - FN = np.sum((Yi == c) & (y_predi != c)) - IoUs.append(np.array([TP, FP, FN])) - return IoUs + def color_images(self, seg, n_classes): ann_u = range(n_classes) @@ -421,7 +391,7 @@ class textlineerkenner: img = cv2.GaussianBlur(img, (15, 15), 0) - img = img / 255.0 + img = img /float( 255.0) img = self.resize_image(img, img_height_page, img_width_page) label_p_pred = model_page.predict( @@ -488,14 +458,14 @@ class textlineerkenner: img = self.otsu_copy(img) img = img.astype(np.uint8) - img = cv2.medianBlur(img,5) + ##img = cv2.medianBlur(img,5) # img = cv2.medianBlur(img,5) # img=cv2.bilateralFilter(img,9,75,75) # img=cv2.bilateralFilter(img,9,75,75) - img = img / 255.0 + img = img / float(255.0) img_h = img.shape[0] img_w = img.shape[1] @@ -648,6 +618,8 @@ class textlineerkenner: _, thresh = cv2.threshold(imgray, 0, 255, 0) contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + # commenst_contours=self.filter_contours_area_of_image(thresh,contours,hirarchy,max_area=0.0002,min_area=0.0001) main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.00001) @@ -697,7 +669,7 @@ class textlineerkenner: if img.shape[1] < img_width_textline: img = cv2.resize(img, (img_height_textline, img.shape[0]), interpolation=cv2.INTER_NEAREST) - margin = False + margin = True if not margin: width = img_width_textline @@ -706,8 +678,8 @@ class textlineerkenner: img = self.otsu_copy(img) img = img.astype(np.uint8) # for _ in range(4): - img = cv2.medianBlur(img,5) - img = img / 255.0 + #img = cv2.medianBlur(img,5) + img = img / float(255.0) img_h = img.shape[0] img_w = img.shape[1] @@ -770,7 +742,7 @@ class textlineerkenner: img = self.otsu_copy(img) img = img.astype(np.uint8) - img = img / 255.0 + img = img /float( 255.0) img_h = img.shape[0] img_w = img.shape[1] @@ -1205,11 +1177,8 @@ class textlineerkenner: [int(x_max), int(point_down)], [int(x_min), int(point_down)]])) - mada_new = np.zeros((img_path.shape[0], img_path.shape[1], 3)) - mada_new = cv2.fillPoly(mada_new, pts=textline_boxes, color=(255, 255, 255)) - mada_new = mada_new.astype(np.uint8) - return mada_new, peaks, textline_boxes_rot + return peaks, textline_boxes_rot def ruturn_rotated_contours(self,slope,img_patch): dst = self.rotate_image(img_patch, slope) @@ -1226,6 +1195,7 @@ class textlineerkenner: return contours def textline_contours_postprocessing(self, textline_mask, slope, contour_text_interest, box_ind): + textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 @@ -1234,33 +1204,9 @@ class textlineerkenner: textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, kernel) textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, kernel) textline_mask = cv2.erode(textline_mask, kernel, iterations=1) - imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY) - - _, thresh = cv2.threshold(imgray, 0, 255, 0) - - thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) - thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) - - contours, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - commenst_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=0.01, - min_area=0.003) - main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.0003) - - - img_comm = np.zeros(thresh.shape) - img_comm_in = cv2.fillPoly(img_comm, pts=main_contours, color=(255, 255, 255)) - - img_comm_in = np.repeat(img_comm_in[:, :, np.newaxis], 3, axis=2) - img_comm_in = img_comm_in.astype(np.uint8) - - imgray = cv2.cvtColor(img_comm_in, cv2.COLOR_BGR2GRAY) - - _, thresh = cv2.threshold(imgray, 0, 255, 0) - - contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + - if len(contours) > 0: + try: dst = self.rotate_image(textline_mask, slope) dst = dst[:, :, 0] @@ -1289,31 +1235,14 @@ class textlineerkenner: - sep_img, _, contours_rotated_clean = self.seperate_lines(dst, contours_text_rot[ind_big_con], slope) - - - dst = self.rotate_image(sep_img, -slope) + _, contours_rotated_clean = self.seperate_lines(dst, contours_text_rot[ind_big_con], slope) - imgray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY) - - _, thresh = cv2.threshold(imgray, 0, 255, 0) - - thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) - thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) - - found_polygons, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - img_in = np.zeros(textline_mask.shape) - img_p_in = cv2.fillPoly(img_in, pts=found_polygons, color=(255, 255, 255)) - else: - - img_in = np.zeros(textline_mask.shape) + except: - img_p_in = cv2.fillPoly(img_in, pts=commenst_contours, color=(255, 255, 255)) - img_p_in = cv2.dilate(img_p_in, kernel, iterations=1) contours_rotated_clean = [] - return img_p_in, contours_rotated_clean + return contours_rotated_clean def textline_contours_to_get_slope_correctly(self, textline_mask, img_patch, contour_interest): @@ -1338,7 +1267,7 @@ class textlineerkenner: textline_maskt = textline_mask[:, :, 0] textline_maskt[textline_maskt != 0] = 1 - _, peaks_point, _ = self.seperate_lines(textline_maskt, contour_interest, slope_new) + peaks_point, _ = self.seperate_lines(textline_maskt, contour_interest, slope_new) mean_dis = np.mean(np.diff(peaks_point)) @@ -1514,47 +1443,23 @@ class textlineerkenner: def get_slopes_for_each_text_region(self, contours,textline_mask_tot): - # first lets find slope for biggest patch of text region (slope of deskewing) - - - #index_max_area = np.argmax(self.area_of_cropped) - #img_int_p=self.all_text_region_raw[index_max_area] - #textline_con=self.return_contours_of_image(img_int_p) - #textline_con_fil=self.filter_contours_area_of_image(img_int_p,textline_con,denoised,max_area=1,min_area=0.0008) - #y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil) - - #sigma_des=int( y_diff_mean * (4./40.0) ) - #refrence : sigma =4 for diff=40 - #if sigma_des<1: - # sigma_des=1 - - - #img_int_p[img_int_p>0]=1 - - slope_biggest=0#self.return_deskew_slop(img_int_p,sigma_des) - - # this was the old method. By now it seems the new one works better. By the way more tests are required. - #slope_biggest = self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[index_max_area], - # denoised, contours[index_max_area]) - - - #if np.abs(slope_biggest) > 20: - # slope_biggest = 0 self.slopes = [] for mv in range(len(self.boxes)): - textline_mask_tot = cv2.erode(textline_mask_tot, self.kernel, iterations=1) + crop_img, _ = self.crop_image_inside_box(self.boxes[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) + crop_img=crop_img[:,:,0] + crop_img=cv2.erode(crop_img,self.kernel,iterations = 1) try: - denoised=None + hierachy=None textline_con=self.return_contours_of_image(crop_img) - textline_con_fil=self.filter_contours_area_of_image(crop_img,textline_con,denoised,max_area=1,min_area=0.0008) + textline_con_fil=self.filter_contours_area_of_image(crop_img,textline_con,hierachy,max_area=1,min_area=0.0008) y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil) - + sigma_des=int( y_diff_mean * (4./40.0) ) if sigma_des<1: @@ -1562,10 +1467,7 @@ class textlineerkenner: crop_img[crop_img>0]=1 slope_corresponding_textregion=self.return_deskew_slop(crop_img,sigma_des) - - #old method - #slope_corresponding_textregion=self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[mv],denoised,contours[mv]) - #text_patch_processed=textline_contours_postprocessing(gada) + except: slope_corresponding_textregion=999 @@ -1703,19 +1605,12 @@ class textlineerkenner: crop_img, _ = self.crop_image_inside_box(self.boxes[jj], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) - text_patch_processed, cnt_clean_rot = self.textline_contours_postprocessing(crop_img[:,:,0] + cnt_clean_rot = self.textline_contours_postprocessing(crop_img[:,:,0] , self.slopes[jj], contours[jj], self.boxes[jj]) - self.all_text_region_processed.append(text_patch_processed) - text_patch_processed = text_patch_processed.astype(np.uint8) - imgray = cv2.cvtColor(text_patch_processed, cv2.COLOR_BGR2GRAY) - _, thresh = cv2.threshold(imgray, 0, 255, 0) - - self.found_polygons, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) self.all_found_texline_polygons.append(cnt_clean_rot) - def write_into_page_xml(self,contours,page_coord,dir_of_image,order_of_texts , id_of_texts): @@ -1973,6 +1868,7 @@ class textlineerkenner: print( "time needed to get slopes of regions (deskewing) = "+"{0:.2f}".format(t6-t5) ) print( "time needed to implement deskewing = "+"{0:.2f}".format(t7-t6) ) + @click.command() From 2d6dd92b310704fe536fc6ebbfa2fdd083c946e5 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Mon, 4 Nov 2019 11:10:17 +0100 Subject: [PATCH 31/47] Update main.py --- qurator/sbb_textline_detector/main.py | 799 +++++++------------------- 1 file changed, 219 insertions(+), 580 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 860a552..54953f5 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -25,6 +25,7 @@ import xml.etree.ElementTree as ET import warnings import click import time +from multiprocessing import Process, Queue, cpu_count with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -78,7 +79,7 @@ class textlineerkenner: polygon = geometry.Polygon([point[0] for point in c]) area = polygon.area if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod( - image.shape[:2]): # and hirarchy[0][jv][3]==-1 : + image.shape[:2]) and hirarchy[0][jv][3] == -1 : # and hirarchy[0][jv][3]==-1 : found_polygons_early.append( np.array([ [point] for point in polygon.exterior.coords], dtype=np.uint)) jv += 1 @@ -115,156 +116,6 @@ class textlineerkenner: seg_f[:, :, j] = (seg == j).astype(int) return seg_f - def jaccard_distance_loss(self, y_true, y_pred, smooth=100): - """ - Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|) - = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|)) - - The jaccard distance loss is usefull for unbalanced datasets. This has been - shifted so it converges on 0 and is smoothed to avoid exploding or disapearing - gradient. - - Ref: https://en.wikipedia.org/wiki/Jaccard_index - - @url: https://gist.github.com/wassname/f1452b748efcbeb4cb9b1d059dce6f96 - @author: wassname - """ - intersection = K.sum(K.abs(y_true * y_pred), axis=-1) - sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1) - jac = (intersection + smooth) / (sum_ - intersection + smooth) - return (1 - jac) * smooth - - def soft_dice_loss(self, y_true, y_pred, epsilon=1e-6): - ''' - Soft dice loss calculation for arbitrary batch size, number of classes, and number of spatial dimensions. - Assumes the `channels_last` format. - - # Arguments - y_true: b x X x Y( x Z...) x c One hot encoding of ground truth - y_pred: b x X x Y( x Z...) x c Network output, must sum to 1 over c channel (such as after softmax) - epsilon: Used for numerical stability to avoid divide by zero errors - - # References - V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation - https://arxiv.org/abs/1606.04797 - More details on Dice loss formulation - https://mediatum.ub.tum.de/doc/1395260/1395260.pdf (page 72) - - Adapted from https://github.com/Lasagne/Recipes/issues/99#issuecomment-347775022 - ''' - - # skip the batch and class axis for calculating Dice score - axes = tuple(range(1, len(y_pred.shape) - 1)) - - numerator = 2. * K.sum(y_pred * y_true, axes) - - denominator = K.sum(K.square(y_pred) + K.square(y_true), axes) - return 1.00 - K.mean(numerator / (denominator + epsilon)) # average over classes and batch - - def weighted_categorical_crossentropy(self, weights=None): - """ weighted_categorical_crossentropy - - Args: - * weights: crossentropy weights - Returns: - * weighted categorical crossentropy function - """ - - def loss(y_true, y_pred): - labels_floats = tf.cast(y_true, tf.float32) - per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats, logits=y_pred) - - if weights is not None: - weight_mask = tf.maximum(tf.reduce_max(tf.constant( - np.array(weights, dtype=np.float32)[None, None, None]) - * labels_floats, axis=-1), 1.0) - per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None] - return tf.reduce_mean(per_pixel_loss) - - return loss - - def seg_metrics(self, y_true, y_pred, metric_name, metric_type='standard', drop_last=True, mean_per_class=False, - verbose=False): - flag_soft = (metric_type == 'soft') - flag_naive_mean = (metric_type == 'naive') - - # always assume one or more classes - num_classes = K.shape(y_true)[-1] - - if not flag_soft: - # get one-hot encoded masks from y_pred (true masks should already be one-hot) - y_pred = K.one_hot(K.argmax(y_pred), num_classes) - y_true = K.one_hot(K.argmax(y_true), num_classes) - - # if already one-hot, could have skipped above command - # keras uses float32 instead of float64, would give error down (but numpy arrays or keras.to_categorical gives float64) - y_true = K.cast(y_true, 'float32') - y_pred = K.cast(y_pred, 'float32') - - # intersection and union shapes are batch_size * n_classes (values = area in pixels) - axes = (1, 2) # W,H axes of each image - intersection = K.sum(K.abs(y_true * y_pred), axis=axes) - mask_sum = K.sum(K.abs(y_true), axis=axes) + K.sum(K.abs(y_pred), axis=axes) - union = mask_sum - intersection # or, np.logical_or(y_pred, y_true) for one-hot - - smooth = .001 - iou = (intersection + smooth) / (union + smooth) - dice = 2 * (intersection + smooth) / (mask_sum + smooth) - - metric = {'iou': iou, 'dice': dice}[metric_name] - - # define mask to be 0 when no pixels are present in either y_true or y_pred, 1 otherwise - mask = K.cast(K.not_equal(union, 0), 'float32') - - if drop_last: - metric = metric[:, :-1] - mask = mask[:, :-1] - - if verbose: - print('intersection, union') - print(K.eval(intersection), K.eval(union)) - print(K.eval(intersection / union)) - - # return mean metrics: remaining axes are (batch, classes) - if flag_naive_mean: - return K.mean(metric) - - # take mean only over non-absent classes - class_count = K.sum(mask, axis=0) - non_zero = tf.greater(class_count, 0) - non_zero_sum = tf.boolean_mask(K.sum(metric * mask, axis=0), non_zero) - non_zero_count = tf.boolean_mask(class_count, non_zero) - - if verbose: - print('Counts of inputs with class present, metrics for non-absent classes') - print(K.eval(class_count), K.eval(non_zero_sum / non_zero_count)) - - return K.mean(non_zero_sum / non_zero_count) - - def mean_iou(self, y_true, y_pred, **kwargs): - return self.seg_metrics(y_true, y_pred, metric_name='iou', **kwargs) - - def Mean_IOU(self, y_true, y_pred): - nb_classes = K.int_shape(y_pred)[-1] - iou = [] - true_pixels = K.argmax(y_true, axis=-1) - pred_pixels = K.argmax(y_pred, axis=-1) - void_labels = K.equal(K.sum(y_true, axis=-1), 0) - for i in range(0, nb_classes): # exclude first label (background) and last label (void) - true_labels = K.equal(true_pixels, i) # & ~void_labels - pred_labels = K.equal(pred_pixels, i) # & ~void_labels - inter = tf.to_int32(true_labels & pred_labels) - union = tf.to_int32(true_labels | pred_labels) - legal_batches = K.sum(tf.to_int32(true_labels), axis=1) > 0 - ious = K.sum(inter, axis=1) / K.sum(union, axis=1) - iou.append( - K.mean(tf.gather(ious, indices=tf.where(legal_batches)))) # returns average IoU of the same objects - iou = tf.stack(iou) - legal_labels = ~tf.debugging.is_nan(iou) - iou = tf.gather(iou, indices=tf.where(legal_labels)) - return K.mean(iou) - - def color_images(self, seg, n_classes): ann_u = range(n_classes) @@ -342,7 +193,7 @@ class textlineerkenner: self.width_org = self.image.shape[1] if self.image.shape[0] < 1000: - self.img_hight_int = 1800 + self.img_hight_int = 2800 self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) elif self.image.shape[0] < 2000 and self.image.shape[0] >= 1000: @@ -350,11 +201,11 @@ class textlineerkenner: self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) elif self.image.shape[0] < 3000 and self.image.shape[0] >= 2000: - self.img_hight_int = 4000 + self.img_hight_int = 5500 self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3000: - self.img_hight_int = 4500 + self.img_hight_int = 6500 self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) else: @@ -371,99 +222,23 @@ class textlineerkenner: config.gpu_options.allow_growth = True session = tf.InteractiveSession() - model = load_model(model_dir, custom_objects={'mean_iou': self.mean_iou, - 'soft_dice_loss': self.soft_dice_loss, - 'jaccard_distance_loss': self.jaccard_distance_loss, - 'Mean_IOU': self.Mean_IOU}) + model = load_model(model_dir, compile=False) return model, session - - def extract_page(self): - model_page, session_page = self.start_new_session_and_model(self.model_page_dir) - - img_height_page = model_page.layers[len(model_page.layers) - 1].output_shape[1] - img_width_page = model_page.layers[len(model_page.layers) - 1].output_shape[2] - n_classes_page = model_page.layers[len(model_page.layers) - 1].output_shape[3] - - img = self.otsu_copy(self.image) - - for ii in range(1): - img = cv2.GaussianBlur(img, (15, 15), 0) - - - img = img /float( 255.0) - img = self.resize_image(img, img_height_page, img_width_page) - - label_p_pred = model_page.predict( - img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) - - seg = np.argmax(label_p_pred, axis=3)[0] - seg_color = self.color_images(seg, n_classes_page) - imgs = self.resize_image(seg_color, self.image.shape[0], self.image.shape[1]) - - - imgs = imgs.astype(np.uint8) - imgray = cv2.cvtColor(imgs, cv2.COLOR_BGR2GRAY) - _, thresh = cv2.threshold(imgray, 0, 255, 0) - - thresh = cv2.dilate(thresh, self.kernel, iterations=3) - contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) - - cnt = contours[np.argmax(cnt_size)] - - x, y, w, h = cv2.boundingRect(cnt) - - box = [x, y, w, h] - - croped_page, page_coord = self.crop_image_inside_box(box, self.image) + + def do_prediction(self,patches,img,model): - self.cont_page=[] - self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , - [ page_coord[3] , page_coord[0] ] , - [ page_coord[3] , page_coord[1] ] , - [ page_coord[2] , page_coord[1] ]] ) ) - - session_page.close() - del model_page - del session_page - del self.image - del seg - del contours - del thresh - del imgs - del img + img_height_model = model.layers[len(model.layers) - 1].output_shape[1] + img_width_model = model.layers[len(model.layers) - 1].output_shape[2] + n_classes = model.layers[len(model.layers) - 1].output_shape[3] - gc.collect() - return croped_page, page_coord + if patches: - def extract_text_regions(self, img): - model_region, session_region = self.start_new_session_and_model(self.model_region_dir) + margin = int(0.1 * img_width_model) - img_height_region = model_region.layers[len(model_region.layers) - 1].output_shape[1] - img_width_region = model_region.layers[len(model_region.layers) - 1].output_shape[2] - n_classes = model_region.layers[len(model_region.layers) - 1].output_shape[3] - margin = True - if margin: + width_mid = img_width_model - 2 * margin + height_mid = img_height_model - 2 * margin - width = img_width_region - height = img_height_region - - # offset=int(.1*width) - offset = int(0.1 * width) - - width_mid = width - 2 * offset - height_mid = height - 2 * offset - - img = self.otsu_copy(img) - img = img.astype(np.uint8) - ##img = cv2.medianBlur(img,5) - - # img = cv2.medianBlur(img,5) - - # img=cv2.bilateralFilter(img,9,75,75) - # img=cv2.bilateralFilter(img,9,75,75) img = img / float(255.0) @@ -490,30 +265,30 @@ class textlineerkenner: if i == 0: index_x_d = i * width_mid - index_x_u = index_x_d + width # (i+1)*width + index_x_u = index_x_d + img_width_model elif i > 0: index_x_d = i * width_mid - index_x_u = index_x_d + width # (i+1)*width + index_x_u = index_x_d + img_width_model if j == 0: index_y_d = j * height_mid - index_y_u = index_y_d + height # (j+1)*height + index_y_u = index_y_d + img_height_model elif j > 0: index_y_d = j * height_mid - index_y_u = index_y_d + height # (j+1)*height + index_y_u = index_y_d + img_height_model if index_x_u > img_w: index_x_u = img_w - index_x_d = img_w - width + index_x_d = img_w - img_width_model if index_y_u > img_h: index_y_u = img_h - index_y_d = img_h - height + index_y_d = img_h - img_height_model img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model_region.predict( + label_p_pred = model.predict( img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) seg = np.argmax(label_p_pred, axis=3)[0] @@ -521,132 +296,179 @@ class textlineerkenner: seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) if i==0 and j==0: - seg_color = seg_color[0:seg_color.shape[0] - offset, 0:seg_color.shape[1] - offset, :] - seg = seg[0:seg.shape[0] - offset, 0:seg.shape[1] - offset] + seg_color = seg_color[0:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :] + seg = seg[0:seg.shape[0] - margin, 0:seg.shape[1] - margin] - mask_true[index_y_d + 0:index_y_u - offset, index_x_d + 0:index_x_u - offset] = seg - prediction_true[index_y_d + 0:index_y_u - offset, index_x_d + 0:index_x_u - offset, + mask_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg + prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + 0:index_x_u - margin, :] = seg_color elif i==nxf-1 and j==nyf-1: - seg_color = seg_color[offset:seg_color.shape[0] - 0, offset:seg_color.shape[1] - 0, :] - seg = seg[offset:seg.shape[0] - 0, offset:seg.shape[1] - 0] + seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - 0, :] + seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - 0] - mask_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - 0] = seg - prediction_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - 0, + mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0] = seg + prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - 0, :] = seg_color elif i==0 and j==nyf-1: - seg_color = seg_color[offset:seg_color.shape[0] - 0, 0:seg_color.shape[1] - offset, :] - seg = seg[offset:seg.shape[0] - 0, 0:seg.shape[1] - offset] + seg_color = seg_color[margin:seg_color.shape[0] - 0, 0:seg_color.shape[1] - margin, :] + seg = seg[margin:seg.shape[0] - 0, 0:seg.shape[1] - margin] - mask_true[index_y_d + offset:index_y_u - 0, index_x_d + 0:index_x_u - offset] = seg - prediction_true[index_y_d + offset:index_y_u - 0, index_x_d + 0:index_x_u - offset, + mask_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin] = seg + prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + 0:index_x_u - margin, :] = seg_color elif i==nxf-1 and j==0: - seg_color = seg_color[0:seg_color.shape[0] - offset, offset:seg_color.shape[1] - 0, :] - seg = seg[0:seg.shape[0] - offset, offset:seg.shape[1] - 0] + seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :] + seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - 0] - mask_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - 0] = seg - prediction_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - 0, + mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg + prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - 0, :] = seg_color elif i==0 and j!=0 and j!=nyf-1: - seg_color = seg_color[offset:seg_color.shape[0] - offset, 0:seg_color.shape[1] - offset, :] - seg = seg[offset:seg.shape[0] - offset, 0:seg.shape[1] - offset] + seg_color = seg_color[margin:seg_color.shape[0] - margin, 0:seg_color.shape[1] - margin, :] + seg = seg[margin:seg.shape[0] - margin, 0:seg.shape[1] - margin] - mask_true[index_y_d + offset:index_y_u - offset, index_x_d + 0:index_x_u - offset] = seg - prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + 0:index_x_u - offset, + mask_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin] = seg + prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + 0:index_x_u - margin, :] = seg_color elif i==nxf-1 and j!=0 and j!=nyf-1: - seg_color = seg_color[offset:seg_color.shape[0] - offset, offset:seg_color.shape[1] - 0, :] - seg = seg[offset:seg.shape[0] - offset, offset:seg.shape[1] - 0] + seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - 0, :] + seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - 0] - mask_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - 0] = seg - prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - 0, + mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0] = seg + prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - 0, :] = seg_color elif i!=0 and i!=nxf-1 and j==0: - seg_color = seg_color[0:seg_color.shape[0] - offset, offset:seg_color.shape[1] - offset, :] - seg = seg[0:seg.shape[0] - offset, offset:seg.shape[1] - offset] + seg_color = seg_color[0:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :] + seg = seg[0:seg.shape[0] - margin, margin:seg.shape[1] - margin] - mask_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - offset] = seg - prediction_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - offset, + mask_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg + prediction_true[index_y_d + 0:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color elif i!=0 and i!=nxf-1 and j==nyf-1: - seg_color = seg_color[offset:seg_color.shape[0] - 0, offset:seg_color.shape[1] - offset, :] - seg = seg[offset:seg.shape[0] - 0, offset:seg.shape[1] - offset] + seg_color = seg_color[margin:seg_color.shape[0] - 0, margin:seg_color.shape[1] - margin, :] + seg = seg[margin:seg.shape[0] - 0, margin:seg.shape[1] - margin] - mask_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - offset] = seg - prediction_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - offset, + mask_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin] = seg + prediction_true[index_y_d + margin:index_y_u - 0, index_x_d + margin:index_x_u - margin, :] = seg_color else: - seg_color = seg_color[offset:seg_color.shape[0] - offset, offset:seg_color.shape[1] - offset, :] - seg = seg[offset:seg.shape[0] - offset, offset:seg.shape[1] - offset] + seg_color = seg_color[margin:seg_color.shape[0] - margin, margin:seg_color.shape[1] - margin, :] + seg = seg[margin:seg.shape[0] - margin, margin:seg.shape[1] - margin] - mask_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset] = seg - prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset, + mask_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin] = seg + prediction_true[index_y_d + margin:index_y_u - margin, index_x_d + margin:index_x_u - margin, :] = seg_color prediction_true = prediction_true.astype(np.uint8) - session_region.close() + + if not patches: - del model_region - del session_region - gc.collect() - return prediction_true + img = img /float( 255.0) + img = self.resize_image(img, img_height_model, img_width_model) - def get_text_region_contours_and_boxes(self, image): - rgb_class = (1, 1, 1) - mask = np.all(image == rgb_class, axis=-1) + label_p_pred = model.predict( + img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) - image = np.repeat(mask[:, :, np.newaxis], 3, axis=2) * 255 - image = image.astype(np.uint8) + seg = np.argmax(label_p_pred, axis=3)[0] + seg_color =np.repeat(seg[:, :, np.newaxis], 3, axis=2) + prediction_true = self.resize_image(seg_color, self.image.shape[0], self.image.shape[1]) + prediction_true = prediction_true.astype(np.uint8) + return prediction_true + + - image = cv2.morphologyEx(image, cv2.MORPH_OPEN, self.kernel) - image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, self.kernel) - #image = cv2.erode(image,self.kernel,iterations = 2) + def extract_page(self): + patches=False + model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + img = self.otsu_copy(self.image) + for ii in range(1): + img = cv2.GaussianBlur(img, (15, 15), 0) - # image = cv2.dilate(image,self.kernel,iterations = 3) + + img_page_prediction=self.do_prediction(patches,img,model_page) + + imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(imgray, 0, 255, 0) - imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + thresh = cv2.dilate(thresh, self.kernel, iterations=3) + contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - _, thresh = cv2.threshold(imgray, 0, 255, 0) + cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) - contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + cnt = contours[np.argmax(cnt_size)] + + x, y, w, h = cv2.boundingRect(cnt) + + box = [x, y, w, h] + + croped_page, page_coord = self.crop_image_inside_box(box, self.image) + self.cont_page=[] + self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , + [ page_coord[3] , page_coord[0] ] , + [ page_coord[3] , page_coord[1] ] , + [ page_coord[2] , page_coord[1] ]] ) ) + session_page.close() + del model_page + del session_page + del self.image + del contours + del thresh + del img - # commenst_contours=self.filter_contours_area_of_image(thresh,contours,hirarchy,max_area=0.0002,min_area=0.0001) - main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.00001) + gc.collect() + return croped_page, page_coord + + def extract_text_regions(self, img): + + patches=True + model_region, session_region = self.start_new_session_and_model(self.model_region_dir) + img = self.otsu_copy(img) + img = img.astype(np.uint8) + + prediction_regions=self.do_prediction(patches,img,model_region) + + session_region.close() + del model_region + del session_region + gc.collect() + return prediction_regions - img_comm = np.zeros(thresh.shape) - img_comm_in = cv2.fillPoly(img_comm, pts=main_contours, color=(255, 255, 255)) + def get_text_region_contours_and_boxes(self, image): + rgb_class_of_texts = (1, 1, 1) + mask_texts = np.all(image == rgb_class_of_texts, axis=-1) - img_comm_in = np.repeat(img_comm_in[:, :, np.newaxis], 3, axis=2) + image = np.repeat(mask_texts[:, :, np.newaxis], 3, axis=2) * 255 + image = image.astype(np.uint8) - img_comm_in = img_comm_in.astype(np.uint8) - # img_comm_in_de=self.deskew_images(img_comm_in) + image = cv2.morphologyEx(image, cv2.MORPH_OPEN, self.kernel) + image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, self.kernel) - imgray = cv2.cvtColor(img_comm_in, cv2.COLOR_BGR2GRAY) - _, thresh = cv2.threshold(imgray, 0, 255, 0) + imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + _, thresh = cv2.threshold(imgray, 0, 255, 0) + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.00001) self.boxes = [] - contours_new = [] - for jj in range(len(contours)): - if hirarchy[0][jj][2] == -1: - x, y, w, h = cv2.boundingRect(contours[jj]) - self.boxes.append([x, y, w, h]) - contours_new.append(contours[jj]) + + for jj in range(len(main_contours)): + x, y, w, h = cv2.boundingRect(main_contours[jj]) + self.boxes.append([x, y, w, h]) + - return contours_new + return main_contours def get_all_image_patches_coordination(self, image_page): self.all_box_coord=[] @@ -656,226 +478,19 @@ class textlineerkenner: def textline_contours(self, img): + patches=True model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir) - img_height_textline = model_textline.layers[len(model_textline.layers) - 1].output_shape[1] - img_width_textline = model_textline.layers[len(model_textline.layers) - 1].output_shape[2] - n_classes = model_textline.layers[len(model_textline.layers) - 1].output_shape[3] - - img_org = img.copy() - - if img.shape[0] < img_height_textline: - img = cv2.resize(img, (img.shape[1], img_width_textline), interpolation=cv2.INTER_NEAREST) - - if img.shape[1] < img_width_textline: - img = cv2.resize(img, (img_height_textline, img.shape[0]), interpolation=cv2.INTER_NEAREST) - - margin = True - if not margin: - - width = img_width_textline - height = img_height_textline - - img = self.otsu_copy(img) - img = img.astype(np.uint8) - # for _ in range(4): - #img = cv2.medianBlur(img,5) - img = img / float(255.0) - - img_h = img.shape[0] - img_w = img.shape[1] - - prediction_true = np.zeros((img_h, img_w, 3)) - mask_true = np.zeros((img_h, img_w)) - nxf = img_w / float(width) - nyf = img_h / float(height) - - if nxf > int(nxf): - nxf = int(nxf) + 1 - else: - nxf = int(nxf) - - if nyf > int(nyf): - nyf = int(nyf) + 1 - else: - nyf = int(nyf) - - for i in range(nxf): - for j in range(nyf): - index_x_d = i * width - index_x_u = (i + 1) * width - - index_y_d = j * height - index_y_u = (j + 1) * height - - if index_x_u > img_w: - index_x_u = img_w - index_x_d = img_w - width - if index_y_u > img_h: - index_y_u = img_h - index_y_d = img_h - height - - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - - label_p_pred = model_textline.predict( - img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) - seg = np.argmax(label_p_pred, axis=3)[0] - seg_color = self.color_images(seg, n_classes) - mask_true[index_y_d:index_y_u, index_x_d:index_x_u] = seg - prediction_true[index_y_d:index_y_u, index_x_d:index_x_u, :] = seg_color - - y_predi = mask_true - y_predi = cv2.resize(y_predi, (img_org.shape[1], img_org.shape[0]), interpolation=cv2.INTER_NEAREST) - - - - if margin: - - width = img_width_textline - height = img_height_textline - - # offset=int(.1*width) - offset = int(0.1 * width) - - width_mid = width - 2 * offset - height_mid = height - 2 * offset - - img = self.otsu_copy(img) - img = img.astype(np.uint8) - - img = img /float( 255.0) - - img_h = img.shape[0] - img_w = img.shape[1] - - prediction_true = np.zeros((img_h, img_w, 3)) - mask_true = np.zeros((img_h, img_w)) - nxf = img_w / float(width_mid) - nyf = img_h / float(height_mid) - - if nxf > int(nxf): - nxf = int(nxf) + 1 - else: - nxf = int(nxf) - - if nyf > int(nyf): - nyf = int(nyf) + 1 - else: - nyf = int(nyf) - - for i in range(nxf): - for j in range(nyf): - - if i == 0: - index_x_d = i * width_mid - index_x_u = index_x_d + width # (i+1)*width - elif i > 0: - index_x_d = i * width_mid - index_x_u = index_x_d + width # (i+1)*width - - if j == 0: - index_y_d = j * height_mid - index_y_u = index_y_d + height # (j+1)*height - elif j > 0: - index_y_d = j * height_mid - index_y_u = index_y_d + height # (j+1)*height - - if index_x_u > img_w: - index_x_u = img_w - index_x_d = img_w - width - if index_y_u > img_h: - index_y_u = img_h - index_y_d = img_h - height - - - - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - - label_p_pred = model_textline.predict( - img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) - - seg = np.argmax(label_p_pred, axis=3)[0] - - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - - if i==0 and j==0: - seg_color = seg_color[0:seg_color.shape[0] - offset, 0:seg_color.shape[1] - offset, :] - seg = seg[0:seg.shape[0] - offset, 0:seg.shape[1] - offset] - - mask_true[index_y_d + 0:index_y_u - offset, index_x_d + 0:index_x_u - offset] = seg - prediction_true[index_y_d + 0:index_y_u - offset, index_x_d + 0:index_x_u - offset, - :] = seg_color - - elif i==nxf-1 and j==nyf-1: - seg_color = seg_color[offset:seg_color.shape[0] - 0, offset:seg_color.shape[1] - 0, :] - seg = seg[offset:seg.shape[0] - 0, offset:seg.shape[1] - 0] - - mask_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - 0] = seg - prediction_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - 0, - :] = seg_color - - elif i==0 and j==nyf-1: - seg_color = seg_color[offset:seg_color.shape[0] - 0, 0:seg_color.shape[1] - offset, :] - seg = seg[offset:seg.shape[0] - 0, 0:seg.shape[1] - offset] - - mask_true[index_y_d + offset:index_y_u - 0, index_x_d + 0:index_x_u - offset] = seg - prediction_true[index_y_d + offset:index_y_u - 0, index_x_d + 0:index_x_u - offset, - :] = seg_color - - elif i==nxf-1 and j==0: - seg_color = seg_color[0:seg_color.shape[0] - offset, offset:seg_color.shape[1] - 0, :] - seg = seg[0:seg.shape[0] - offset, offset:seg.shape[1] - 0] - - mask_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - 0] = seg - prediction_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - 0, - :] = seg_color - - elif i==0 and j!=0 and j!=nyf-1: - seg_color = seg_color[offset:seg_color.shape[0] - offset, 0:seg_color.shape[1] - offset, :] - seg = seg[offset:seg.shape[0] - offset, 0:seg.shape[1] - offset] - - mask_true[index_y_d + offset:index_y_u - offset, index_x_d + 0:index_x_u - offset] = seg - prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + 0:index_x_u - offset, - :] = seg_color - - elif i==nxf-1 and j!=0 and j!=nyf-1: - seg_color = seg_color[offset:seg_color.shape[0] - offset, offset:seg_color.shape[1] - 0, :] - seg = seg[offset:seg.shape[0] - offset, offset:seg.shape[1] - 0] - - mask_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - 0] = seg - prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - 0, - :] = seg_color - - elif i!=0 and i!=nxf-1 and j==0: - seg_color = seg_color[0:seg_color.shape[0] - offset, offset:seg_color.shape[1] - offset, :] - seg = seg[0:seg.shape[0] - offset, offset:seg.shape[1] - offset] - - mask_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - offset] = seg - prediction_true[index_y_d + 0:index_y_u - offset, index_x_d + offset:index_x_u - offset, - :] = seg_color - - elif i!=0 and i!=nxf-1 and j==nyf-1: - seg_color = seg_color[offset:seg_color.shape[0] - 0, offset:seg_color.shape[1] - offset, :] - seg = seg[offset:seg.shape[0] - 0, offset:seg.shape[1] - offset] - - mask_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - offset] = seg - prediction_true[index_y_d + offset:index_y_u - 0, index_x_d + offset:index_x_u - offset, - :] = seg_color - - else: - seg_color = seg_color[offset:seg_color.shape[0] - offset, offset:seg_color.shape[1] - offset, :] - seg = seg[offset:seg.shape[0] - offset, offset:seg.shape[1] - offset] - - mask_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset] = seg - prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset, - :] = seg_color + img = self.otsu_copy(img) + img = img.astype(np.uint8) + + prediction_textline=self.do_prediction(patches,img,model_textline) - y_predi = mask_true.astype(np.uint8) session_textline.close() del model_textline del session_textline gc.collect() - return y_predi + return prediction_textline[:,:,0] def get_textlines_for_each_textregions(self, textline_mask_tot, boxes): textline_mask_tot = cv2.erode(textline_mask_tot, self.kernel, iterations=1) @@ -1180,7 +795,7 @@ class textlineerkenner: return peaks, textline_boxes_rot - def ruturn_rotated_contours(self,slope,img_patch): + def return_rotated_contours(self,slope,img_patch): dst = self.rotate_image(img_patch, slope) dst = dst.astype(np.uint8) dst = dst[:, :, 0] @@ -1188,7 +803,6 @@ class textlineerkenner: imgray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) - thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) @@ -1198,14 +812,12 @@ class textlineerkenner: textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 - textline_mask = textline_mask.astype(np.uint8) kernel = np.ones((5, 5), np.uint8) textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, kernel) textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, kernel) - textline_mask = cv2.erode(textline_mask, kernel, iterations=1) + textline_mask = cv2.erode(textline_mask, kernel, iterations=2) - try: dst = self.rotate_image(textline_mask, slope) @@ -1310,7 +922,7 @@ class textlineerkenner: imgray = cv2.cvtColor(image_box_tabels, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - return contours + return contours,hierachy def find_contours_mean_y_diff(self,contours_main): M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))] @@ -1441,22 +1053,22 @@ class textlineerkenner: return ang_int - def get_slopes_for_each_text_region(self, contours,textline_mask_tot): - - slope_biggest=0#self.return_deskew_slop(img_int_p,sigma_des) - - self.slopes = [] - for mv in range(len(self.boxes)): + + def do_work_of_slopes(self,q,poly,box_sub,boxes_per_process,textline_mask_tot,contours_per_process): + slope_biggest=0 + slopes_sub = [] + boxes_sub_new=[] + poly_sub=[] + for mv in range(len(boxes_per_process)): - crop_img, _ = self.crop_image_inside_box(self.boxes[mv], + crop_img, _ = self.crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) crop_img=crop_img[:,:,0] - crop_img=cv2.erode(crop_img,self.kernel,iterations = 1) + crop_img=cv2.erode(crop_img,self.kernel,iterations = 2) try: - hierachy=None - textline_con=self.return_contours_of_image(crop_img) + textline_con,hierachy=self.return_contours_of_image(crop_img) textline_con_fil=self.filter_contours_area_of_image(crop_img,textline_con,hierachy,max_area=1,min_area=0.0008) y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil) @@ -1477,9 +1089,59 @@ class textlineerkenner: slope_corresponding_textregion=slope_biggest elif slope_corresponding_textregion==999: slope_corresponding_textregion=slope_biggest - self.slopes.append(slope_corresponding_textregion) - + slopes_sub.append(slope_corresponding_textregion) + + cnt_clean_rot = self.textline_contours_postprocessing(crop_img + , slope_corresponding_textregion, + contours_per_process[mv], boxes_per_process[mv]) + + poly_sub.append(cnt_clean_rot) + boxes_sub_new.append(boxes_per_process[mv] ) + + + q.put(slopes_sub) + poly.put(poly_sub) + box_sub.put(boxes_sub_new ) + + def get_slopes_and_deskew(self, contours,textline_mask_tot): + slope_biggest=0#self.return_deskew_slop(img_int_p,sigma_des) + + num_cores = cpu_count() + q = Queue() + poly=Queue() + box_sub=Queue() + + processes = [] + nh=np.linspace(0, len(self.boxes), num_cores+1) + + + for i in range(num_cores): + boxes_per_process=self.boxes[int(nh[i]):int(nh[i+1])] + contours_per_process=contours[int(nh[i]):int(nh[i+1])] + processes.append(Process(target=self.do_work_of_slopes, args=(q,poly,box_sub, boxes_per_process, textline_mask_tot, contours_per_process))) + + for i in range(num_cores): + processes[i].start() + + self.slopes = [] + self.all_found_texline_polygons=[] + self.boxes=[] + + for i in range(num_cores): + slopes_for_sub_process=q.get(True) + boxes_for_sub_process=box_sub.get(True) + polys_for_sub_process=poly.get(True) + + for j in range(len(slopes_for_sub_process)): + self.slopes.append(slopes_for_sub_process[j]) + self.all_found_texline_polygons.append(polys_for_sub_process[j]) + self.boxes.append(boxes_for_sub_process[j]) + + for i in range(num_cores): + processes[i].join() + + def order_of_regions(self, textline_mask,contours_main): mada_n=textline_mask.sum(axis=1) y=mada_n[:] @@ -1542,24 +1204,16 @@ class textlineerkenner: matrix_of_orders[len_main:,1]=2 matrix_of_orders[:len_main,2]=cx_main - - matrix_of_orders[:len_main,3]=cy_main - - matrix_of_orders[:len_main,4]=np.array( range( len_main ) ) - peaks_neg_new=[] - peaks_neg_new.append(0) for iii in range(len(peaks_neg)): peaks_neg_new.append(peaks_neg[iii]) - peaks_neg_new.append(textline_mask.shape[0]) - final_indexers_sorted=[] for i in range(len(peaks_neg_new)-1): top=peaks_neg_new[i] @@ -1575,8 +1229,6 @@ class textlineerkenner: for j in range(len(ind_in_int)): final_indexers_sorted.append(int(ind_in_int[j]) ) - - return final_indexers_sorted, matrix_of_orders @@ -1595,22 +1247,6 @@ class textlineerkenner: order_of_texts return order_of_texts, id_of_texts - def deskew_textline_patches(self, contours,textline_mask_tot): - self.all_text_region_processed = [] - self.all_found_texline_polygons = [] - - - for jj in range(len(self.boxes)): - - crop_img, _ = self.crop_image_inside_box(self.boxes[jj], - np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) - - cnt_clean_rot = self.textline_contours_postprocessing(crop_img[:,:,0] - , self.slopes[jj], - contours[jj], self.boxes[jj]) - - - self.all_found_texline_polygons.append(cnt_clean_rot) def write_into_page_xml(self,contours,page_coord,dir_of_image,order_of_texts , id_of_texts): @@ -1791,6 +1427,7 @@ class textlineerkenner: # extract text regions and corresponding contours and surrounding box text_regions=self.extract_text_regions(image_page) + contours=self.get_text_region_contours_and_boxes(text_regions) @@ -1804,13 +1441,11 @@ class textlineerkenner: if len(contours)>0: - self.get_all_image_patches_coordination(image_page) - - ########## - gc.collect() + # extracting textlines using segmentation textline_mask_tot=self.textline_contours(image_page) + #print(textline_mask_tot) #plt.imshow(textline_mask_tot) #plt.show() ########## @@ -1835,17 +1470,21 @@ class textlineerkenner: # calculate the slope for deskewing for each box of text region. - self.get_slopes_for_each_text_region(contours,textline_mask_tot) + self.get_slopes_and_deskew(contours,textline_mask_tot) ########## gc.collect() + t6=time.time() # do deskewing for each box of text region. - self.deskew_textline_patches(contours,textline_mask_tot) + ###self.deskew_textline_patches(contours,textline_mask_tot) + + self.get_all_image_patches_coordination(image_page) + ########## ########## gc.collect() From fbd21cdb814c0e13ac592b6420300d1cc93e4d98 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Tue, 19 Nov 2019 10:59:41 +0100 Subject: [PATCH 32/47] =?UTF-8?q?=F0=9F=A7=B9=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Do=20not=20create=20empty/space-only=20TextEquivs=20(again)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/main.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 54953f5..e6d9f46 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1364,12 +1364,6 @@ class textlineerkenner: coord = ET.SubElement(textline, 'Coords') - - texteq=ET.SubElement(textline, 'TextEquiv') - - uni=ET.SubElement(texteq, 'Unicode') - uni.text = ' ' - #points = ET.SubElement(coord, 'Points') points_co='' From bf41a29e7b4ca13a787f2006ea179ebec01ef1e0 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Tue, 19 Nov 2019 11:05:18 +0100 Subject: [PATCH 33/47] =?UTF-8?q?=F0=9F=90=9B=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Do=20not=20hardcode=20Created/LastChange=20elements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index e6d9f46..6f30785 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -26,6 +26,8 @@ import warnings import click import time from multiprocessing import Process, Queue, cpu_count +import datetime + with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -1269,11 +1271,9 @@ class textlineerkenner: created=ET.SubElement(metadata, 'Created') - created.text = '2019-06-17T18:15:12' - + created.text = datetime.datetime.now().isoformat() changetime=ET.SubElement(metadata, 'LastChange') - changetime.text = '2019-06-17T18:15:12' - + changetime.text = datetime.datetime.now().isoformat() page=ET.SubElement(data,'Page') From 4fb3e70ef60e9d42d9a557e7c6172badb14ebf50 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Tue, 19 Nov 2019 11:08:41 +0100 Subject: [PATCH 34/47] =?UTF-8?q?=F0=9F=A7=B9=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Do=20not=20create=20empty/space-only=20TextEquivs=20(again)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/main.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 6f30785..8003646 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1391,16 +1391,9 @@ class textlineerkenner: points_co=points_co+' ' #print(points_co) coord.set('points',points_co) - - texteqreg=ET.SubElement(textregion, 'TextEquiv') - - unireg=ET.SubElement(texteqreg, 'Unicode') - unireg.text = ' ' - - tree = ET.ElementTree(data) tree.write(os.path.join(self.dir_out, self.f_name) + ".xml") From 4aed06a325bf7d172612198ae1b5fa00ea723b0d Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Tue, 19 Nov 2019 15:08:53 +0100 Subject: [PATCH 35/47] =?UTF-8?q?=E2=9C=A8=20sbb=5Ftextline=5Fdetection:?= =?UTF-8?q?=20Preserve=20input=20PAGE=20info=20by=20merging=20segmentation?= =?UTF-8?q?=20results?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ocrd_sbb_textline_detection used the output XML by main.py as is, and – by doing this – threw away any input data from the input PAGE, including the critical pc:AlternativeImage and the less important pc:MetadataItem. Fix this by merging the segmentation results into a file created from the input file. Also add a pc:MetadataItem processingStep about the segmentation operation. --- qurator/sbb_textline_detector/ocrd_cli.py | 49 ++++++++++++++++++++--- requirements.txt | 3 +- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/qurator/sbb_textline_detector/ocrd_cli.py b/qurator/sbb_textline_detector/ocrd_cli.py index 2a98104..728fafb 100644 --- a/qurator/sbb_textline_detector/ocrd_cli.py +++ b/qurator/sbb_textline_detector/ocrd_cli.py @@ -1,11 +1,14 @@ import json import os +import tempfile import click +import ocrd_models.ocrd_page from ocrd import Processor from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor from ocrd_modelfactory import page_from_file from ocrd_models import OcrdFile +from ocrd_models.ocrd_page_generateds import MetadataItemType, LabelsType, LabelType from ocrd_utils import concat_padded, getLogger, MIMETYPE_PAGE from pkg_resources import resource_string @@ -22,10 +25,14 @@ def ocrd_sbb_textline_detector(*args, **kwargs): return ocrd_cli_wrap_processor(OcrdSbbTextlineDetectorRecognize, *args, **kwargs) +TOOL = 'ocrd_sbb_textline_detector' + + class OcrdSbbTextlineDetectorRecognize(Processor): def __init__(self, *args, **kwargs): - kwargs['ocrd_tool'] = OCRD_TOOL['tools']['ocrd_sbb_textline_detector'] + kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL] + kwargs['version'] = OCRD_TOOL['version'] super(OcrdSbbTextlineDetectorRecognize, self).__init__(*args, **kwargs) def _make_file_id(self, input_file, input_file_grp, n): @@ -49,7 +56,6 @@ class OcrdSbbTextlineDetectorRecognize(Processor): log.info("INPUT FILE %i / %s", n, input_file) file_id = self._make_file_id(input_file, self.output_file_grp, n) - image_file = self._resolve_image_file(input_file) # Process the files try: @@ -57,16 +63,47 @@ class OcrdSbbTextlineDetectorRecognize(Processor): except FileExistsError: pass - model = self.parameter['model'] - x = textlineerkenner(image_file, self.output_file_grp, file_id, model) - x.run() + with tempfile.TemporaryDirectory() as tmp_dirname: + # Segment the image + image_file = self._resolve_image_file(input_file) + model = self.parameter['model'] + x = textlineerkenner(image_file, tmp_dirname, file_id, model) + x.run() + + # Read segmentation results + tmp_filename = os.path.join(tmp_dirname, file_id) + '.xml' + tmp_pcgts = ocrd_models.ocrd_page.parse(tmp_filename) + tmp_page = tmp_pcgts.get_Page() + + # Create a new PAGE file from the input file + pcgts = page_from_file(self.workspace.download_file(input_file)) + page = pcgts.get_Page() + + # Merge results → PAGE file + page.set_PrintSpace(tmp_page.get_PrintSpace()) + page.set_ReadingOrder(tmp_page.get_ReadingOrder()) + page.set_TextRegion(tmp_page.get_TextRegion()) + + # Save metadata about this operation + metadata = pcgts.get_Metadata() + metadata.add_MetadataItem( + MetadataItemType(type_="processingStep", + name=self.ocrd_tool['steps'][0], + value=TOOL, + Labels=[LabelsType( + externalModel="ocrd-tool", + externalId="parameters", + Label=[LabelType(type_=name, value=self.parameter[name]) + for name in self.parameter.keys()])])) self.workspace.add_file( ID=file_id, file_grp=self.output_file_grp, pageId=page_id, mimetype='application/vnd.prima.page+xml', - local_filename=os.path.join(self.output_file_grp, file_id) + '.xml') + local_filename=os.path.join(self.output_file_grp, file_id) + '.xml', + content=ocrd_models.ocrd_page.to_xml(pcgts) + ) if __name__ == '__main__': diff --git a/requirements.txt b/requirements.txt index 3bcc5bc..42de57a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,5 +9,4 @@ scikit-learn tensorflow-gpu < 2.0 scipy click -ocrd >= 1.0.0b19 - +ocrd >= 2.0.0 From 8fa7179560f9dd019ffdb9c8976d486500d02340 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 20 Nov 2019 09:50:29 +0100 Subject: [PATCH 36/47] =?UTF-8?q?=F0=9F=90=9B=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Disable=20multiprocessing=20to=20fix=20race=20condition?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lines were sorted in the wrong regions. Work around this by disabling multiprocessing until a proper fix is done. --- qurator/sbb_textline_detector/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 8003646..fc62018 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1109,7 +1109,7 @@ class textlineerkenner: slope_biggest=0#self.return_deskew_slop(img_int_p,sigma_des) - num_cores = cpu_count() + num_cores = 1 # XXX cpu_count() q = Queue() poly=Queue() box_sub=Queue() From 0182b7087ff2544e318b4caf369a6065f956f2f8 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Wed, 20 Nov 2019 14:05:15 +0100 Subject: [PATCH 37/47] remove multiprocessing bug --- qurator/sbb_textline_detector/main.py | 43 +++++++++++++-------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index fc62018..44399f1 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1056,14 +1056,15 @@ class textlineerkenner: return ang_int - def do_work_of_slopes(self,q,poly,box_sub,boxes_per_process,textline_mask_tot,contours_per_process): + def do_work_of_slopes(self,q,poly,box_sub,boxes_per_process,contours_sub,textline_mask_tot,contours_per_process): slope_biggest=0 slopes_sub = [] boxes_sub_new=[] poly_sub=[] + contours_sub_per_p=[] for mv in range(len(boxes_per_process)): - + contours_sub_per_p.append(contours_per_process[mv]) crop_img, _ = self.crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) crop_img=crop_img[:,:,0] @@ -1099,20 +1100,23 @@ class textlineerkenner: poly_sub.append(cnt_clean_rot) boxes_sub_new.append(boxes_per_process[mv] ) + q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new ) + contours_sub.put(contours_sub_per_p) def get_slopes_and_deskew(self, contours,textline_mask_tot): slope_biggest=0#self.return_deskew_slop(img_int_p,sigma_des) - num_cores = 1 # XXX cpu_count() + num_cores = cpu_count() q = Queue() poly=Queue() box_sub=Queue() + contours_sub=Queue() processes = [] nh=np.linspace(0, len(self.boxes), num_cores+1) @@ -1121,28 +1125,33 @@ class textlineerkenner: for i in range(num_cores): boxes_per_process=self.boxes[int(nh[i]):int(nh[i+1])] contours_per_process=contours[int(nh[i]):int(nh[i+1])] - processes.append(Process(target=self.do_work_of_slopes, args=(q,poly,box_sub, boxes_per_process, textline_mask_tot, contours_per_process))) + processes.append(Process(target=self.do_work_of_slopes, args=(q,poly,box_sub, boxes_per_process, contours_sub, textline_mask_tot, contours_per_process))) for i in range(num_cores): processes[i].start() self.slopes = [] self.all_found_texline_polygons=[] + all_found_text_regions=[] self.boxes=[] for i in range(num_cores): slopes_for_sub_process=q.get(True) boxes_for_sub_process=box_sub.get(True) polys_for_sub_process=poly.get(True) + contours_for_subprocess=contours_sub.get(True) for j in range(len(slopes_for_sub_process)): self.slopes.append(slopes_for_sub_process[j]) self.all_found_texline_polygons.append(polys_for_sub_process[j]) self.boxes.append(boxes_for_sub_process[j]) + all_found_text_regions.append(contours_for_subprocess[j]) for i in range(num_cores): processes[i].join() + return all_found_text_regions + def order_of_regions(self, textline_mask,contours_main): mada_n=textline_mask.sum(axis=1) @@ -1441,33 +1450,23 @@ class textlineerkenner: t4=time.time() - # get orders of each textregion. This method by now only works for one column documents. - indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours) - order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted ) - ########## - gc.collect() + # calculate the slope for deskewing for each box of text region. + contours=self.get_slopes_and_deskew(contours,textline_mask_tot) + gc.collect() t5=time.time() - - # just get the textline result for each box of text regions - #self.get_textlines_for_each_textregions(textline_mask_tot) - ########## - - # calculate the slope for deskewing for each box of text region. - self.get_slopes_and_deskew(contours,textline_mask_tot) + # get orders of each textregion. This method by now only works for one column documents. + indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours) + order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted ) ########## gc.collect() - - t6=time.time() - # do deskewing for each box of text region. - ###self.deskew_textline_patches(contours,textline_mask_tot) self.get_all_image_patches_coordination(image_page) @@ -1490,8 +1489,8 @@ class textlineerkenner: print( "time needed for page extraction = "+"{0:.2f}".format(t2-t1) ) print( "time needed for text region extraction and get contours = "+"{0:.2f}".format(t3-t2) ) print( "time needed for textlines = "+"{0:.2f}".format(t4-t3) ) - print( "time needed to get order of regions = "+"{0:.2f}".format(t5-t4) ) - print( "time needed to get slopes of regions (deskewing) = "+"{0:.2f}".format(t6-t5) ) + print( "time needed to get slopes of regions (deskewing) = "+"{0:.2f}".format(t5-t4) ) + print( "time needed to get order of regions = "+"{0:.2f}".format(t6-t5) ) print( "time needed to implement deskewing = "+"{0:.2f}".format(t7-t6) ) From a11f6740cb30062c9fb19dc72df768cc5552e73a Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 28 Nov 2019 16:19:44 +0100 Subject: [PATCH 38/47] Update main.py - robust deskewing and better page extraction --- qurator/sbb_textline_detector/main.py | 428 ++++++++++++-------------- 1 file changed, 200 insertions(+), 228 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 44399f1..1f78aaf 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -51,7 +51,7 @@ class textlineerkenner: self.f_name = self.f_name.split('.')[0] self.dir_models = dir_models self.kernel = np.ones((5, 5), np.uint8) - self.model_page_dir = dir_models + '/model_page.h5' + self.model_page_dir = dir_models + '/model_page_new.h5' self.model_region_dir = dir_models + '/model_strukturerkennung.h5' self.model_textline_dir = dir_models + '/model_textline.h5' @@ -199,20 +199,32 @@ class textlineerkenner: self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) elif self.image.shape[0] < 2000 and self.image.shape[0] >= 1000: - self.img_hight_int = 3500 + self.img_hight_int = int(self.image.shape[0]*1.1) self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) - elif self.image.shape[0] < 3000 and self.image.shape[0] >= 2000: - self.img_hight_int = 5500 + elif self.image.shape[0] < 3300 and self.image.shape[0] >= 2000: + self.img_hight_int = int(self.image.shape[0]*1.1) self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) - elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3000: + elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3300 and self.image.shape[1]<2400 : + self.img_hight_int = int(self.image.shape[0]*1.1)# 6500 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + + elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3300 and self.image.shape[1]>=2400 : self.img_hight_int = 6500 self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) - + + elif self.image.shape[0] < 5400 and self.image.shape[0] > 4000 and self.image.shape[1]>3300 : + self.img_hight_int = int(self.image.shape[0]*1.6)# 6500 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + elif self.image.shape[0] < 11000 and self.image.shape[0] >= 7000 : + self.img_hight_int = int(self.image.shape[0]*1.6)# 6500 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) else: - self.img_hight_int = self.image.shape[0] - self.img_width_int = self.image.shape[1] + self.img_hight_int = int(self.image.shape[0]*1.1)# 6500 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + #self.img_hight_int = self.image.shape[0] + #self.img_width_int = self.image.shape[1] self.scale_y = self.img_hight_int / float(self.image.shape[0]) self.scale_x = self.img_width_int / float(self.image.shape[1]) @@ -391,8 +403,8 @@ class textlineerkenner: patches=False model_page, session_page = self.start_new_session_and_model(self.model_page_dir) img = self.otsu_copy(self.image) - for ii in range(1): - img = cv2.GaussianBlur(img, (15, 15), 0) + #for ii in range(1): + # img = cv2.GaussianBlur(img, (15, 15), 0) img_page_prediction=self.do_prediction(patches,img,model_page) @@ -400,7 +412,7 @@ class textlineerkenner: imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) - thresh = cv2.dilate(thresh, self.kernel, iterations=3) + thresh = cv2.dilate(thresh, self.kernel, iterations=6) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) @@ -408,16 +420,28 @@ class textlineerkenner: cnt = contours[np.argmax(cnt_size)] x, y, w, h = cv2.boundingRect(cnt) + + try: + box = [x, y, w, h] + + croped_page, page_coord = self.crop_image_inside_box(box, self.image) + - box = [x, y, w, h] + self.cont_page=[] + self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , + [ page_coord[3] , page_coord[0] ] , + [ page_coord[3] , page_coord[1] ] , + [ page_coord[2] , page_coord[1] ]] ) ) + except: + box = [0, 0, self.image.shape[1]-1, self.image.shape[0]-1] + croped_page, page_coord = self.crop_image_inside_box(box, self.image) + - croped_page, page_coord = self.crop_image_inside_box(box, self.image) - - self.cont_page=[] - self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , - [ page_coord[3] , page_coord[0] ] , - [ page_coord[3] , page_coord[1] ] , - [ page_coord[2] , page_coord[1] ]] ) ) + self.cont_page=[] + self.cont_page.append( np.array( [ [ page_coord[2] , page_coord[0] ] , + [ page_coord[3] , page_coord[0] ] , + [ page_coord[3] , page_coord[1] ] , + [ page_coord[2] , page_coord[1] ]] ) ) session_page.close() del model_page @@ -437,8 +461,10 @@ class textlineerkenner: img = self.otsu_copy(img) img = img.astype(np.uint8) + prediction_regions=self.do_prediction(patches,img,model_region) + session_region.close() del model_region del session_region @@ -505,8 +531,8 @@ class textlineerkenner: self.all_text_region_raw.append(crop_img[:, :, 0]) self.area_of_cropped.append(crop_img.shape[0] * crop_img.shape[1]) - def seperate_lines(self, img_path, contour_text_interest, thetha): - (h, w) = img_path.shape[:2] + def seperate_lines(self, img_patch, contour_text_interest, thetha): + (h, w) = img_patch.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, -thetha, 1.0) x_d = M[0, 2] @@ -522,19 +548,19 @@ class textlineerkenner: y_cont = y_cont - np.min(y_cont) x_min_cont = 0 - x_max_cont = img_path.shape[1] + x_max_cont = img_patch.shape[1] y_min_cont = 0 - y_max_cont = img_path.shape[0] + y_max_cont = img_patch.shape[0] xv = np.linspace(x_min_cont, x_max_cont, 1000) - mada_n = img_path.sum(axis=1) + textline_patch_sum_along_width = img_patch.sum(axis=1) first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) - y = mada_n[:] # [first_nonzero:last_nonzero] - y_help = np.zeros(len(y) + 40) - y_help[20:len(y) + 20] = y + y = textline_patch_sum_along_width[:] # [first_nonzero:last_nonzero] + y_padded = np.zeros(len(y) + 40) + y_padded[20:len(y) + 20] = y x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) @@ -544,14 +570,20 @@ class textlineerkenner: sigma_gaus=8 - z= gaussian_filter1d(y_help, sigma_gaus) - zneg_rev=-y_help+np.max(y_help) - zneg=np.zeros(len(zneg_rev)+40) - zneg[20:len(zneg_rev)+20]=zneg_rev - zneg= gaussian_filter1d(zneg, sigma_gaus) + y_padded_smoothed= gaussian_filter1d(y_padded, sigma_gaus) + y_padded_up_to_down=-y_padded+np.max(y_padded) + y_padded_up_to_down_padded=np.zeros(len(y_padded_up_to_down)+40) + y_padded_up_to_down_padded[20:len(y_padded_up_to_down)+20]=y_padded_up_to_down + y_padded_up_to_down_padded= gaussian_filter1d(y_padded_up_to_down_padded, sigma_gaus) + - peaks, _ = find_peaks(z, height=0) - peaks_neg, _ = find_peaks(zneg, height=0) + peaks, _ = find_peaks(y_padded_smoothed, height=0) + peaks_neg, _ = find_peaks(y_padded_up_to_down_padded, height=0) + + mean_value_of_peaks=np.mean(y_padded_smoothed[peaks]) + std_value_of_peaks=np.std(y_padded_smoothed[peaks]) + peaks_values=y_padded_smoothed[peaks] + peaks_neg = peaks_neg - 20 - 20 peaks = peaks - 20 @@ -568,21 +600,40 @@ class textlineerkenner: textline_boxes_rot = [] if len(peaks_neg) == len(peaks) + 1 and len(peaks) >= 3: + #print('11') for jj in range(len(peaks)): - dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) - dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) + + if jj==(len(peaks)-1): + dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) + dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) + + if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: + point_up = peaks[jj] + first_nonzero - int(1.3 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) + point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.3 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + else: + point_up = peaks[jj] + first_nonzero - int(1.4 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) + point_down =y_max_cont-1##peaks[jj] + first_nonzero + int(1.6 * dis_to_next_down) #point_up# np.max(y_cont)#peaks[jj] + first_nonzero + int(1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) - point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) - point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + point_down_narrow = peaks[jj] + first_nonzero + int( + 1.4 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + else: + dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) + dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) + + if peaks_values[jj]>mean_value_of_peaks-std_value_of_peaks/2.: + point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + else: + point_up = peaks[jj] + first_nonzero - int(1.23 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.33 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) - point_down_narrow = peaks[jj] + first_nonzero + int( - 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + point_down_narrow = peaks[jj] + first_nonzero + int( + 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) - if point_down >= img_path.shape[0]: - point_down = img_path.shape[0] - 2 - if point_down_narrow >= img_path.shape[0]: - point_down_narrow = img_path.shape[0] - 2 + + if point_down_narrow >= img_patch.shape[0]: + point_down_narrow = img_patch.shape[0] - 2 distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) for mj in range(len(xv))] @@ -672,15 +723,15 @@ class textlineerkenner: dis_to_next = np.abs(peaks[1] - peaks[0]) for jj in range(len(peaks)): if jj == 0: - point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) + point_up = 0#peaks[jj] + first_nonzero - int(1. / 1.7 * dis_to_next) if point_up < 0: point_up = 1 - point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) + point_down = peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next) elif jj == 1: - point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) - if point_down >= img_path.shape[0]: - point_down = img_path.shape[0] - 2 - point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) + point_down = peaks[jj] + first_nonzero + int(1. / 1.8 * dis_to_next) + if point_down >= img_patch.shape[0]: + point_down = img_patch.shape[0] - 2 + point_up = peaks[jj] + first_nonzero - int(1. / 1.8 * dis_to_next) distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) for mj in range(len(xv))] @@ -692,8 +743,8 @@ class textlineerkenner: x_min = x_min_cont x_max = x_max_cont else: - x_min = np.min(xvinside) # max(x_min_interest,x_min_cont) - x_max = np.max(xvinside) # min(x_max_interest,x_max_cont) + x_min = np.min(xvinside) + x_max = np.max(xvinside) p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)]) p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)]) @@ -737,9 +788,9 @@ class textlineerkenner: elif jj == len(peaks) - 1: dis_to_next = peaks[jj] - peaks[jj - 1] # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next) - point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) - if point_down >= img_path.shape[0]: - point_down = img_path.shape[0] - 2 + point_down = peaks[jj] + first_nonzero + int(1. / 1.7 * dis_to_next) + if point_down >= img_patch.shape[0]: + point_down = img_patch.shape[0] - 2 # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) else: @@ -858,65 +909,7 @@ class textlineerkenner: return contours_rotated_clean - def textline_contours_to_get_slope_correctly(self, textline_mask, img_patch, contour_interest): - - slope_new = 0 # deskew_images(img_patch) - - textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 - - textline_mask = textline_mask.astype(np.uint8) - textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, self.kernel) - textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, self.kernel) - textline_mask = cv2.erode(textline_mask, self.kernel, iterations=1) - imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY) - _, thresh = cv2.threshold(imgray, 0, 255, 0) - - thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, self.kernel) - thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, self.kernel) - - contours, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.003) - - textline_maskt = textline_mask[:, :, 0] - textline_maskt[textline_maskt != 0] = 1 - - peaks_point, _ = self.seperate_lines(textline_maskt, contour_interest, slope_new) - - mean_dis = np.mean(np.diff(peaks_point)) - - len_x = thresh.shape[1] - - slope_lines = [] - contours_slope_new = [] - for kk in range(len(main_contours)): - - xminh = np.min(main_contours[kk][:, 0]) - xmaxh = np.max(main_contours[kk][:, 0]) - yminh = np.min(main_contours[kk][:, 1]) - ymaxh = np.max(main_contours[kk][:, 1]) - - - if ymaxh - yminh <= mean_dis and ( - xmaxh - xminh) >= 0.3 * len_x: # xminh>=0.05*len_x and xminh<=0.4*len_x and xmaxh<=0.95*len_x and xmaxh>=0.6*len_x: - contours_slope_new.append(main_contours[kk]) - - rows, cols = thresh.shape[:2] - [vx, vy, x, y] = cv2.fitLine(main_contours[kk], cv2.DIST_L2, 0, 0.01, 0.01) - - slope_lines.append((vy / vx) / np.pi * 180) - - if len(slope_lines) >= 2: - - slope = np.mean(slope_lines) # slope_true/np.pi*180 - else: - slope = 999 - - else: - slope = 0 - - return slope def return_contours_of_image(self,image_box_tabels_1): image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2) @@ -935,18 +928,18 @@ class textlineerkenner: def isNaN(self,num): return num != num - def find_num_col(self,regions_without_seperators,sigma_,multiplier=3.8 ): - regions_without_seperators_0=regions_without_seperators[:,:].sum(axis=1) + def get_standard_deviation_of_summed_textline_patch_along_width(self,img_patch,sigma_,multiplier=3.8 ): + img_patch_sum_along_width=img_patch[:,:].sum(axis=1) - meda_n_updown=regions_without_seperators_0[len(regions_without_seperators_0)::-1] + img_patch_sum_along_width_updown=img_patch_sum_along_width[len(img_patch_sum_along_width)::-1] - first_nonzero=(next((i for i, x in enumerate(regions_without_seperators_0) if x), 0)) - last_nonzero=(next((i for i, x in enumerate(meda_n_updown) if x), 0)) + first_nonzero=(next((i for i, x in enumerate(img_patch_sum_along_width) if x), 0)) + last_nonzero=(next((i for i, x in enumerate(img_patch_sum_along_width_updown) if x), 0)) - last_nonzero=len(regions_without_seperators_0)-last_nonzero + last_nonzero=len(img_patch_sum_along_width)-last_nonzero - y=regions_without_seperators_0#[first_nonzero:last_nonzero] + y=img_patch_sum_along_width#[first_nonzero:last_nonzero] y_help=np.zeros(len(y)+20) @@ -971,152 +964,127 @@ class textlineerkenner: peaks, _ = find_peaks(z, height=0) peaks_neg=peaks_neg-10-10 - - - - last_nonzero=last_nonzero-0#100 - first_nonzero=first_nonzero+0#+100 - - peaks_neg=peaks_neg[(peaks_neg>first_nonzero) & (peaks_neg.06*regions_without_seperators.shape[1]) & (peaks<0.94*regions_without_seperators.shape[1])] - interest_pos=z[peaks] interest_pos=interest_pos[interest_pos>10] - - interest_neg=z[peaks_neg] - - - if interest_neg[0]<0.1: - interest_neg=interest_neg[1:] - if interest_neg[len(interest_neg)-1]<0.1: - interest_neg=interest_neg[:len(interest_neg)-1] - + interest_neg=z[peaks_neg] - min_peaks_pos=np.min(interest_pos) + min_peaks_pos=np.mean(interest_pos) min_peaks_neg=0#np.min(interest_neg) - dis_talaei=(min_peaks_pos-min_peaks_neg)/multiplier + #print(interest_pos) grenze=min_peaks_pos-dis_talaei#np.mean(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])-np.std(y[peaks_neg[0]:peaks_neg[len(peaks_neg)-1]])/2.0 - interest_neg_fin=interest_neg#[(interest_neg0]=1 - slope_corresponding_textregion=self.return_deskew_slop(crop_img,sigma_des) - - + sigma_des=2 + slope_corresponding_textregion=self.return_deskew_slope(crop_img,sigma_des) except: slope_corresponding_textregion=999 if np.abs(slope_corresponding_textregion)>12.5 and slope_corresponding_textregion!=999: - slope_corresponding_textregion=slope_biggest + slope_corresponding_textregion=0 elif slope_corresponding_textregion==999: - slope_corresponding_textregion=slope_biggest - slopes_sub.append(slope_corresponding_textregion) + slope_corresponding_textregion=0 + slopes_per_each_subprocess.append(slope_corresponding_textregion) - cnt_clean_rot = self.textline_contours_postprocessing(crop_img + bounding_rectangle_of_textlines = self.textline_contours_postprocessing(crop_img , slope_corresponding_textregion, contours_per_process[mv], boxes_per_process[mv]) - poly_sub.append(cnt_clean_rot) - boxes_sub_new.append(boxes_per_process[mv] ) + textlines_rectangles_per_each_subprocess.append(bounding_rectangle_of_textlines) + bounding_box_of_textregion_per_each_subprocess.append(boxes_per_process[mv] ) - q.put(slopes_sub) - poly.put(poly_sub) - box_sub.put(boxes_sub_new ) - contours_sub.put(contours_sub_per_p) + queue_of_slopes_per_textregion.put(slopes_per_each_subprocess) + queue_of_textlines_rectangle_per_textregion.put(textlines_rectangles_per_each_subprocess) + queue_of_textregion_box.put(bounding_box_of_textregion_per_each_subprocess ) + queue_of_quntours_of_textregion.put(contours_textregion_per_each_subprocess) def get_slopes_and_deskew(self, contours,textline_mask_tot): - - slope_biggest=0#self.return_deskew_slop(img_int_p,sigma_des) - num_cores = cpu_count() - q = Queue() - poly=Queue() - box_sub=Queue() - contours_sub=Queue() + + queue_of_slopes_per_textregion = Queue() + queue_of_textlines_rectangle_per_textregion=Queue() + queue_of_textregion_box=Queue() + queue_of_quntours_of_textregion=Queue() processes = [] nh=np.linspace(0, len(self.boxes), num_cores+1) @@ -1125,7 +1093,8 @@ class textlineerkenner: for i in range(num_cores): boxes_per_process=self.boxes[int(nh[i]):int(nh[i+1])] contours_per_process=contours[int(nh[i]):int(nh[i+1])] - processes.append(Process(target=self.do_work_of_slopes, args=(q,poly,box_sub, boxes_per_process, contours_sub, textline_mask_tot, contours_per_process))) + processes.append(Process(target=self.do_work_of_slopes, args=(queue_of_slopes_per_textregion,queue_of_textlines_rectangle_per_textregion, + queue_of_textregion_box, boxes_per_process, queue_of_quntours_of_textregion, textline_mask_tot, contours_per_process))) for i in range(num_cores): processes[i].start() @@ -1136,10 +1105,10 @@ class textlineerkenner: self.boxes=[] for i in range(num_cores): - slopes_for_sub_process=q.get(True) - boxes_for_sub_process=box_sub.get(True) - polys_for_sub_process=poly.get(True) - contours_for_subprocess=contours_sub.get(True) + slopes_for_sub_process=queue_of_slopes_per_textregion.get(True) + boxes_for_sub_process=queue_of_textregion_box.get(True) + polys_for_sub_process=queue_of_textlines_rectangle_per_textregion.get(True) + contours_for_subprocess=queue_of_quntours_of_textregion.get(True) for j in range(len(slopes_for_sub_process)): self.slopes.append(slopes_for_sub_process[j]) @@ -1154,11 +1123,11 @@ class textlineerkenner: def order_of_regions(self, textline_mask,contours_main): - mada_n=textline_mask.sum(axis=1) - y=mada_n[:] - - y_help=np.zeros(len(y)+40) - y_help[20:len(y)+20]=y + textline_sum_along_width=textline_mask.sum(axis=1) + + y=textline_sum_along_width[:] + y_padded=np.zeros(len(y)+40) + y_padded[20:len(y)+20]=y x=np.array( range(len(y)) ) @@ -1167,8 +1136,8 @@ class textlineerkenner: sigma_gaus=8 - z= gaussian_filter1d(y_help, sigma_gaus) - zneg_rev=-y_help+np.max(y_help) + z= gaussian_filter1d(y_padded, sigma_gaus) + zneg_rev=-y_padded+np.max(y_padded) zneg=np.zeros(len(zneg_rev)+40) zneg[20:len(zneg_rev)+20]=zneg_rev @@ -1423,6 +1392,12 @@ class textlineerkenner: # extract text regions and corresponding contours and surrounding box text_regions=self.extract_text_regions(image_page) + + text_regions = cv2.erode(text_regions, self.kernel, iterations=3) + text_regions = cv2.dilate(text_regions, self.kernel, iterations=4) + + #plt.imshow(text_regions[:,:,0]) + #plt.show() contours=self.get_text_region_contours_and_boxes(text_regions) @@ -1441,9 +1416,6 @@ class textlineerkenner: # extracting textlines using segmentation textline_mask_tot=self.textline_contours(image_page) - #print(textline_mask_tot) - #plt.imshow(textline_mask_tot) - #plt.show() ########## K.clear_session() gc.collect() @@ -1493,7 +1465,6 @@ class textlineerkenner: print( "time needed to get order of regions = "+"{0:.2f}".format(t6-t5) ) print( "time needed to implement deskewing = "+"{0:.2f}".format(t7-t6) ) - @click.command() @@ -1509,3 +1480,4 @@ def main(image, out, model): if __name__ == "__main__": main() + From 2112bb18c67d96feda61486d3d5cadab4ba2a9d5 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Fri, 29 Nov 2019 11:29:12 +0100 Subject: [PATCH 39/47] fixed the bug: local variable 't4' referenced before assignment --- qurator/sbb_textline_detector/main.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py index 1f78aaf..e70e475 100644 --- a/qurator/sbb_textline_detector/main.py +++ b/qurator/sbb_textline_detector/main.py @@ -1460,10 +1460,11 @@ class textlineerkenner: print( "time total = "+"{0:.2f}".format(time.time()-t1) ) print( "time needed for page extraction = "+"{0:.2f}".format(t2-t1) ) print( "time needed for text region extraction and get contours = "+"{0:.2f}".format(t3-t2) ) - print( "time needed for textlines = "+"{0:.2f}".format(t4-t3) ) - print( "time needed to get slopes of regions (deskewing) = "+"{0:.2f}".format(t5-t4) ) - print( "time needed to get order of regions = "+"{0:.2f}".format(t6-t5) ) - print( "time needed to implement deskewing = "+"{0:.2f}".format(t7-t6) ) + if len(contours)>0: + print( "time needed for textlines = "+"{0:.2f}".format(t4-t3) ) + print( "time needed to get slopes of regions (deskewing) = "+"{0:.2f}".format(t5-t4) ) + print( "time needed to get order of regions = "+"{0:.2f}".format(t6-t5) ) + print( "time needed to implement deskewing = "+"{0:.2f}".format(t7-t6) ) From af5cbe9052e20673d1862e299e7c8b27a96dea13 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Wed, 4 Dec 2019 11:42:45 +0100 Subject: [PATCH 40/47] =?UTF-8?q?=F0=9F=90=9B=20sbb=5Ftextline=5Fdetector:?= =?UTF-8?q?=20Fix=20making=20the=20output=20file=20id?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/sbb_textline_detector/ocrd_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qurator/sbb_textline_detector/ocrd_cli.py b/qurator/sbb_textline_detector/ocrd_cli.py index 728fafb..d090e46 100644 --- a/qurator/sbb_textline_detector/ocrd_cli.py +++ b/qurator/sbb_textline_detector/ocrd_cli.py @@ -55,7 +55,7 @@ class OcrdSbbTextlineDetectorRecognize(Processor): input_file = self.workspace.mets.find_files(fileGrp=self.input_file_grp, pageId=page_id)[0] log.info("INPUT FILE %i / %s", n, input_file) - file_id = self._make_file_id(input_file, self.output_file_grp, n) + file_id = self._make_file_id(input_file, self.input_file_grp, n) # Process the files try: From b0dc6491c7a3920e4d89d734ff90f332c3fd284b Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 5 Dec 2019 15:46:36 +0100 Subject: [PATCH 41/47] Update README.md --- README.md | 35 +++++------------------------------ 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index a0180f1..bcc502a 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,12 @@ # Textline-Recognition *** +# Tool: +This tool does textline detection of image and throw result as xml data. -# Installation: +# Models: +In order to run this tool you need corresponding models. You can find them here +https://file.spk-berlin.de:8443/textline_detection/ -Setup virtual environment: -``` -virtualenv --python=python3.6 venv -``` - -Activate virtual environment: -``` -source venv/bin/activate -``` - -Upgrade pip: -``` -pip install -U pip -``` - -Install package together with its dependencies in development mode: -``` -pip install -e ./ -``` - -*** - -Perform document structure and textline analysis on a -scanned document image and save the result as PAGE XML. - -### Usage -``` -text_line_recognition --help -``` From a836a083c12c01fe7187c7ab06e93cc2b250c8af Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 5 Dec 2019 15:47:02 +0100 Subject: [PATCH 42/47] Update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bcc502a..7c2aa46 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,9 @@ This tool does textline detection of image and throw result as xml data. # Models: -In order to run this tool you need corresponding models. You can find them here -https://file.spk-berlin.de:8443/textline_detection/ +In order to run this tool you need corresponding models. You can find them here: + + https://file.spk-berlin.de:8443/textline_detection/ From ad4f7acdd8367c0221a1f1a457ad2c8304c8d0c9 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 5 Dec 2019 15:47:26 +0100 Subject: [PATCH 43/47] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7c2aa46..a7c8dc4 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This tool does textline detection of image and throw result as xml data. # Models: In order to run this tool you need corresponding models. You can find them here: - https://file.spk-berlin.de:8443/textline_detection/ +https://file.spk-berlin.de:8443/textline_detection/ From fb7c605515836349800e58f82b40b6e58cff0c5d Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 5 Dec 2019 16:06:55 +0100 Subject: [PATCH 44/47] Update README.md --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a7c8dc4..f2a9faa 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,16 @@ # Textline-Recognition *** -# Tool: +# Tool This tool does textline detection of image and throw result as xml data. -# Models: +# Models In order to run this tool you need corresponding models. You can find them here: https://file.spk-berlin.de:8443/textline_detection/ +# Usage + + From eeff5a0b2dcb9eb198ca543b380e87923ff9027c Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 5 Dec 2019 16:15:07 +0100 Subject: [PATCH 45/47] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f2a9faa..9dd02ea 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ In order to run this tool you need corresponding models. You can find them here: https://file.spk-berlin.de:8443/textline_detection/ -# Usage + From 1013b7ed642ac14bd37686cd4442ab220a93bba6 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 5 Dec 2019 16:30:09 +0100 Subject: [PATCH 46/47] Update README.md --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 9dd02ea..8f29e2e 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,14 @@ In order to run this tool you need corresponding models. You can find them here: https://file.spk-berlin.de:8443/textline_detection/ +# Installation + +sudo pip install . + +# Usage + +sbb_textline_detector -i 'image file name here' -o 'directory to write output xml here' -m 'here should be directory of models' + From af670b55ac8e5a67ec763809e1f31ca52b20494c Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 5 Dec 2019 16:31:12 +0100 Subject: [PATCH 47/47] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8f29e2e..d3da05f 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ sudo pip install . # Usage -sbb_textline_detector -i 'image file name here' -o 'directory to write output xml here' -m 'here should be directory of models' +sbb_textline_detector -i 'image file name' -o 'directory to write output xml' -m 'directory of models'