commit b85a9dc2561f16b1a8908332f8536eabb7e37564 Author: Gerber, Mike Date: Thu Oct 10 16:13:07 2019 +0200 🧹 sbb_textline_docker: Rename to sbb_textline_detector diff --git a/.gitkeep b/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..020db6f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3 + +ADD main.py / +ADD requirements.txt / + +RUN pip install --proxy=http-proxy.sbb.spk-berlin.de:3128 -r requirements.txt + +ENTRYPOINT ["python", "./main.py"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..a0180f1 --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# Textline-Recognition + +*** + +# Installation: + +Setup virtual environment: +``` +virtualenv --python=python3.6 venv +``` + +Activate virtual environment: +``` +source venv/bin/activate +``` + +Upgrade pip: +``` +pip install -U pip +``` + +Install package together with its dependencies in development mode: +``` +pip install -e ./ +``` + +*** + +Perform document structure and textline analysis on a +scanned document image and save the result as PAGE XML. + +### Usage +``` +text_line_recognition --help +``` + + diff --git a/main.py b/main.py new file mode 100644 index 0000000..1701382 --- /dev/null +++ b/main.py @@ -0,0 +1,1333 @@ +#! /usr/bin/env python3 + +__version__ = '1.0' + +import os +import sys +import cv2 +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +from sys import getsizeof +import random +from tqdm import tqdm +from keras.models import model_from_json +from keras.models import load_model +import math +from shapely import geometry +from sklearn.cluster import KMeans +import gc +from keras import backend as K +import tensorflow as tf +from scipy.signal import find_peaks +from scipy.ndimage import gaussian_filter1d +import xml.etree.ElementTree as ET +import warnings +import argparse + +with warnings.catch_warnings(): + warnings.simplefilter("ignore") + +__doc__ = \ + """ + tool to extract table form data from alto xml data + """ + + +class textlineerkenner: + def __init__(self, image_dir, dir_out, dir_models): + self.image_dir = image_dir + self.dir_out = dir_out + self.dir_models = dir_models + try: + self.f_name = image_dir.split('/')[len(image_dir.split('/')) - 1] + self.f_name = self.f_name.split('.')[0] + print(self.f_name) + except: + self.f_name = self.f_name.split('.')[0] + self.kernel = np.ones((5, 5), np.uint8) + self.model_page_dir = dir_models + '/model_page.h5' + self.model_region_dir = dir_models + '/model_strukturerkennung.h5' + self.model_textline_dir = dir_models + '/model_textline.h5' + + def find_polugons_size_filter(self, contours, median_area, scaler_up=1.2, scaler_down=0.8): + found_polygons_early = list() + + for c in contours: + if len(c) < 3: # A polygon cannot have less than 3 points + continue + + polygon = geometry.Polygon([point[0] for point in c]) + area = polygon.area + # Check that polygon has area greater than minimal area + if area >= median_area * scaler_down and area <= median_area * scaler_up: + found_polygons_early.append( + np.array([point for point in polygon.exterior.coords], dtype=np.uint)) + return found_polygons_early + + def filter_contours_area_of_image(self, image, contours, hirarchy, max_area, min_area): + found_polygons_early = list() + + jv = 0 + for c in contours: + if len(c) < 3: # A polygon cannot have less than 3 points + continue + + polygon = geometry.Polygon([point[0] for point in c]) + area = polygon.area + if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod( + image.shape[:2]): # and hirarchy[0][jv][3]==-1 : + found_polygons_early.append( + np.array([point for point in polygon.exterior.coords], dtype=np.uint)) + jv += 1 + return found_polygons_early + + def filter_contours_area_of_image_interiors(self, image, contours, hirarchy, max_area, min_area): + found_polygons_early = list() + + jv = 0 + for c in contours: + if len(c) < 3: # A polygon cannot have less than 3 points + continue + + polygon = geometry.Polygon([point[0] for point in c]) + area = polygon.area + if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and \ + hirarchy[0][jv][3] != -1: + # print(c[0][0][1]) + found_polygons_early.append( + np.array([point for point in polygon.exterior.coords], dtype=np.uint)) + jv += 1 + return found_polygons_early + + def resize_image(self, img_in, input_height, input_width): + return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) + + def resize_ann(self, seg_in, input_height, input_width): + return cv2.resize(seg_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) + + def get_one_hot(self, seg, input_height, input_width, n_classes): + seg = seg[:, :, 0] + seg_f = np.zeros((input_height, input_width, n_classes)) + for j in range(n_classes): + seg_f[:, :, j] = (seg == j).astype(int) + return seg_f + + def jaccard_distance_loss(self, y_true, y_pred, smooth=100): + """ + Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|) + = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|)) + + The jaccard distance loss is usefull for unbalanced datasets. This has been + shifted so it converges on 0 and is smoothed to avoid exploding or disapearing + gradient. + + Ref: https://en.wikipedia.org/wiki/Jaccard_index + + @url: https://gist.github.com/wassname/f1452b748efcbeb4cb9b1d059dce6f96 + @author: wassname + """ + intersection = K.sum(K.abs(y_true * y_pred), axis=-1) + sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1) + jac = (intersection + smooth) / (sum_ - intersection + smooth) + return (1 - jac) * smooth + + def soft_dice_loss(self, y_true, y_pred, epsilon=1e-6): + ''' + Soft dice loss calculation for arbitrary batch size, number of classes, and number of spatial dimensions. + Assumes the `channels_last` format. + + # Arguments + y_true: b x X x Y( x Z...) x c One hot encoding of ground truth + y_pred: b x X x Y( x Z...) x c Network output, must sum to 1 over c channel (such as after softmax) + epsilon: Used for numerical stability to avoid divide by zero errors + + # References + V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation + https://arxiv.org/abs/1606.04797 + More details on Dice loss formulation + https://mediatum.ub.tum.de/doc/1395260/1395260.pdf (page 72) + + Adapted from https://github.com/Lasagne/Recipes/issues/99#issuecomment-347775022 + ''' + + # skip the batch and class axis for calculating Dice score + axes = tuple(range(1, len(y_pred.shape) - 1)) + + numerator = 2. * K.sum(y_pred * y_true, axes) + + denominator = K.sum(K.square(y_pred) + K.square(y_true), axes) + return 1.00 - K.mean(numerator / (denominator + epsilon)) # average over classes and batch + + def weighted_categorical_crossentropy(self, weights=None): + """ weighted_categorical_crossentropy + + Args: + * weights: crossentropy weights + Returns: + * weighted categorical crossentropy function + """ + + def loss(y_true, y_pred): + labels_floats = tf.cast(y_true, tf.float32) + per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats, logits=y_pred) + + if weights is not None: + weight_mask = tf.maximum(tf.reduce_max(tf.constant( + np.array(weights, dtype=np.float32)[None, None, None]) + * labels_floats, axis=-1), 1.0) + per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None] + return tf.reduce_mean(per_pixel_loss) + + return loss + + def seg_metrics(self, y_true, y_pred, metric_name, metric_type='standard', drop_last=True, mean_per_class=False, + verbose=False): + flag_soft = (metric_type == 'soft') + flag_naive_mean = (metric_type == 'naive') + + # always assume one or more classes + num_classes = K.shape(y_true)[-1] + + if not flag_soft: + # get one-hot encoded masks from y_pred (true masks should already be one-hot) + y_pred = K.one_hot(K.argmax(y_pred), num_classes) + y_true = K.one_hot(K.argmax(y_true), num_classes) + + # if already one-hot, could have skipped above command + # keras uses float32 instead of float64, would give error down (but numpy arrays or keras.to_categorical gives float64) + y_true = K.cast(y_true, 'float32') + y_pred = K.cast(y_pred, 'float32') + + # intersection and union shapes are batch_size * n_classes (values = area in pixels) + axes = (1, 2) # W,H axes of each image + intersection = K.sum(K.abs(y_true * y_pred), axis=axes) + mask_sum = K.sum(K.abs(y_true), axis=axes) + K.sum(K.abs(y_pred), axis=axes) + union = mask_sum - intersection # or, np.logical_or(y_pred, y_true) for one-hot + + smooth = .001 + iou = (intersection + smooth) / (union + smooth) + dice = 2 * (intersection + smooth) / (mask_sum + smooth) + + metric = {'iou': iou, 'dice': dice}[metric_name] + + # define mask to be 0 when no pixels are present in either y_true or y_pred, 1 otherwise + mask = K.cast(K.not_equal(union, 0), 'float32') + + if drop_last: + metric = metric[:, :-1] + mask = mask[:, :-1] + + if verbose: + print('intersection, union') + print(K.eval(intersection), K.eval(union)) + print(K.eval(intersection / union)) + + # return mean metrics: remaining axes are (batch, classes) + if flag_naive_mean: + return K.mean(metric) + + # take mean only over non-absent classes + class_count = K.sum(mask, axis=0) + non_zero = tf.greater(class_count, 0) + non_zero_sum = tf.boolean_mask(K.sum(metric * mask, axis=0), non_zero) + non_zero_count = tf.boolean_mask(class_count, non_zero) + + if verbose: + print('Counts of inputs with class present, metrics for non-absent classes') + print(K.eval(class_count), K.eval(non_zero_sum / non_zero_count)) + + return K.mean(non_zero_sum / non_zero_count) + + def mean_iou(self, y_true, y_pred, **kwargs): + return self.seg_metrics(y_true, y_pred, metric_name='iou', **kwargs) + + def Mean_IOU(self, y_true, y_pred): + nb_classes = K.int_shape(y_pred)[-1] + iou = [] + true_pixels = K.argmax(y_true, axis=-1) + pred_pixels = K.argmax(y_pred, axis=-1) + void_labels = K.equal(K.sum(y_true, axis=-1), 0) + for i in range(0, nb_classes): # exclude first label (background) and last label (void) + true_labels = K.equal(true_pixels, i) # & ~void_labels + pred_labels = K.equal(pred_pixels, i) # & ~void_labels + inter = tf.to_int32(true_labels & pred_labels) + union = tf.to_int32(true_labels | pred_labels) + legal_batches = K.sum(tf.to_int32(true_labels), axis=1) > 0 + ious = K.sum(inter, axis=1) / K.sum(union, axis=1) + iou.append( + K.mean(tf.gather(ious, indices=tf.where(legal_batches)))) # returns average IoU of the same objects + iou = tf.stack(iou) + legal_labels = ~tf.debugging.is_nan(iou) + iou = tf.gather(iou, indices=tf.where(legal_labels)) + return K.mean(iou) + + def IoU(self, Yi, y_predi): + ## mean Intersection over Union + ## Mean IoU = TP/(FN + TP + FP) + + IoUs = [] + Nclass = np.unique(Yi) + for c in Nclass: + TP = np.sum((Yi == c) & (y_predi == c)) + FP = np.sum((Yi != c) & (y_predi == c)) + FN = np.sum((Yi == c) & (y_predi != c)) + IoU = TP / float(TP + FP + FN) + print("class {:02.0f}: #TP={:6.0f}, #FP={:6.0f}, #FN={:5.0f}, IoU={:4.3f}".format(c, TP, FP, FN, IoU)) + IoUs.append(IoU) + mIoU = np.mean(IoUs) + print("_________________") + print("Mean IoU: {:4.3f}".format(mIoU)) + return mIoU + + def IoU_case(self, Yi, y_predi, n_classes): + ## mean Intersection over Union + ## Mean IoU = TP/(FN + TP + FP) + + IoUs = [] + + Nclass = n_classes + for c in range(Nclass): + TP = np.sum((Yi == c) & (y_predi == c)) + FP = np.sum((Yi != c) & (y_predi == c)) + FN = np.sum((Yi == c) & (y_predi != c)) + IoUs.append(np.array([TP, FP, FN])) + return IoUs + + def color_images(self, seg, n_classes): + ann_u = range(n_classes) + if len(np.shape(seg)) == 3: + seg = seg[:, :, 0] + + seg_img = np.zeros((np.shape(seg)[0], np.shape(seg)[1], 3)).astype(np.uint8) + colors = sns.color_palette("hls", n_classes) + + for c in ann_u: + c = int(c) + segl = (seg == c) + seg_img[:, :, 0] = segl * c + seg_img[:, :, 1] = segl * c + seg_img[:, :, 2] = segl * c + return seg_img + + def color_images_diva(self, seg, n_classes): + ann_u = range(n_classes) + if len(np.shape(seg)) == 3: + seg = seg[:, :, 0] + + seg_img = np.zeros((np.shape(seg)[0], np.shape(seg)[1], 3)).astype(float) + # colors=sns.color_palette("hls", n_classes) + colors = [[1, 0, 0], [8, 0, 0], [2, 0, 0], [4, 0, 0]] + + for c in ann_u: + c = int(c) + segl = (seg == c) + seg_img[:, :, 0][seg == c] = colors[c][0] # segl*(colors[c][0]) + seg_img[:, :, 1][seg == c] = colors[c][1] # seg_img[:,:,1]=segl*(colors[c][1]) + seg_img[:, :, 2][seg == c] = colors[c][2] # seg_img[:,:,2]=segl*(colors[c][2]) + return seg_img + + def rotate_image(self, img_patch, slope): + (h, w) = img_patch.shape[:2] + center = (w // 2, h // 2) + M = cv2.getRotationMatrix2D(center, slope, 1.0) + return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) + + def cleaning_probs(self, probs: np.ndarray, sigma: float) -> np.ndarray: + # Smooth + if sigma > 0.: + return cv2.GaussianBlur(probs, (int(3 * sigma) * 2 + 1, int(3 * sigma) * 2 + 1), sigma) + elif sigma == 0.: + return cv2.fastNlMeansDenoising((probs * 255).astype(np.uint8), h=20) / 255 + else: # Negative sigma, do not do anything + return probs + + def crop_image_inside_box(self, box, img_org_copy): + image_box = img_org_copy[box[1]:box[1] + box[3], box[0]:box[0] + box[2]] + return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] + + def otsu_copy(self, img): + img_r = np.zeros(img.shape) + img1 = img[:, :, 0] + img2 = img[:, :, 1] + img3 = img[:, :, 2] + # print(img.min()) + # print(img[:,:,0].min()) + # blur = cv2.GaussianBlur(img,(5,5)) + # ret3,th3 = cv2.threshold(blur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU) + retval1, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + retval2, threshold2 = cv2.threshold(img2, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + retval3, threshold3 = cv2.threshold(img3, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + + img_r[:, :, 0] = threshold1 + img_r[:, :, 1] = threshold1 + img_r[:, :, 2] = threshold1 + return img_r + + def get_image_and_scales(self): + self.image = cv2.imread(self.image_dir) + self.height_org = self.image.shape[0] + self.width_org = self.image.shape[1] + + if self.image.shape[0] < 1000: + self.img_hight_int = 1800 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + + elif self.image.shape[0] < 2000 and self.image.shape[0] >= 1000: + self.img_hight_int = 3500 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + + elif self.image.shape[0] < 3000 and self.image.shape[0] >= 2000: + self.img_hight_int = 4000 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + + elif self.image.shape[0] < 4000 and self.image.shape[0] >= 3000: + self.img_hight_int = 4500 + self.img_width_int = int(self.img_hight_int * self.image.shape[1] / float(self.image.shape[0])) + + else: + self.img_hight_int = self.image.shape[0] + self.img_width_int = self.image.shape[1] + + self.scale_y = self.img_hight_int / float(self.image.shape[0]) + self.scale_x = self.img_width_int / float(self.image.shape[1]) + + self.image = self.resize_image(self.image, self.img_hight_int, self.img_width_int) + + def start_new_session_and_model(self, model_dir): + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + + session = tf.InteractiveSession() + model = load_model(model_dir, custom_objects={'mean_iou': self.mean_iou, + 'soft_dice_loss': self.soft_dice_loss, + 'jaccard_distance_loss': self.jaccard_distance_loss, + 'Mean_IOU': self.Mean_IOU}) + + return model, session + + def extract_page(self): + model_page, session_page = self.start_new_session_and_model(self.model_page_dir) + + img_height_page = model_page.layers[len(model_page.layers) - 1].output_shape[1] + img_width_page = model_page.layers[len(model_page.layers) - 1].output_shape[2] + n_classes_page = model_page.layers[len(model_page.layers) - 1].output_shape[3] + + img_org_copy = self.image.copy() + + img = self.otsu_copy(self.image) + + for ii in range(60): + img = cv2.GaussianBlur(img, (15, 15), 0) + + # img=self.image.astype(np.uint8) + # img = cv2.medianBlur(img,5) + + img = img / 255.0 + img = self.resize_image(img, img_height_page, img_width_page) + + label_p_pred = model_page.predict( + img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + + seg = np.argmax(label_p_pred, axis=3)[0] + seg_color = self.color_images(seg, n_classes_page) + + imgs = seg_color # /np.max(seg_color)*255#np.repeat(seg_color[:, :, np.newaxis], 3, axis=2) + + imgs = self.resize_image(imgs, img_org_copy.shape[0], img_org_copy.shape[1]) + + # plt.imshow(imgs*255) + # plt.show() + + imgs = imgs.astype(np.uint8) + imgray = cv2.cvtColor(imgs, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + thresh = cv2.dilate(thresh, self.kernel, iterations=30) + contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) + + cnt = contours[np.argmax(cnt_size)] + + x, y, w, h = cv2.boundingRect(cnt) + + box = [x, y, w, h] + + croped_page, page_coord = self.crop_image_inside_box(box, img_org_copy) + + session_page.close() + del model_page + del session_page + + gc.collect() + return croped_page, page_coord + + def extract_text_regions(self, img): + model_region, session_region = self.start_new_session_and_model(self.model_region_dir) + + img_height_region = model_region.layers[len(model_region.layers) - 1].output_shape[1] + img_width_region = model_region.layers[len(model_region.layers) - 1].output_shape[2] + n_classes = model_region.layers[len(model_region.layers) - 1].output_shape[3] + margin = True + if margin: + + width = img_width_region + height = img_height_region + + # offset=int(.1*width) + offset = int(0.03 * width) + + width_mid = width - 2 * offset + height_mid = height - 2 * offset + + img = self.otsu_copy(img) + img = img.astype(np.uint8) + ###img = cv2.medianBlur(img,5) + + # img = cv2.medianBlur(img,5) + + # img=cv2.bilateralFilter(img,9,75,75) + # img=cv2.bilateralFilter(img,9,75,75) + + img = img / 255.0 + + img_h = img.shape[0] + img_w = img.shape[1] + + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width_mid) + nyf = img_h / float(height_mid) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + else: + nxf = int(nxf) + + if nyf > int(nyf): + nyf = int(nyf) + 1 + else: + nyf = int(nyf) + + for i in range(nxf): + for j in range(nyf): + + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + width # (i+1)*width + elif i > 0: + index_x_d = i * width_mid + index_x_u = index_x_d + width # (i+1)*width + + if j == 0: + index_y_d = j * height_mid + index_y_u = index_y_d + height # (j+1)*height + elif j > 0: + index_y_d = j * height_mid + index_y_u = index_y_d + height # (j+1)*height + + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - width + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - height + + img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + label_p_pred = model_region.predict( + img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + + seg = np.argmax(label_p_pred, axis=3)[0] + + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + + seg_color = seg_color[offset:seg_color.shape[0] - offset, offset:seg_color.shape[1] - offset, :] + seg = seg[offset:seg.shape[0] - offset, offset:seg.shape[1] - offset] + + mask_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset] = seg + prediction_true[index_y_d + offset:index_y_u - offset, index_x_d + offset:index_x_u - offset, + :] = seg_color + + prediction_true = prediction_true.astype(np.uint8) + session_region.close() + + del model_region + del session_region + gc.collect() + return prediction_true + + def get_text_region_contours_and_boxes(self, image): + rgb_class = (1, 1, 1) + mask = np.all(image == rgb_class, axis=-1) + + image = np.repeat(mask[:, :, np.newaxis], 3, axis=2) * 255 + image = image.astype(np.uint8) + + image = cv2.morphologyEx(image, cv2.MORPH_OPEN, self.kernel) + image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, self.kernel) + # image = cv2.erode(image,self.kernel,iterations = 3) + + # image = cv2.dilate(image,self.kernel,iterations = 3) + + imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + # commenst_contours=self.filter_contours_area_of_image(thresh,contours,hirarchy,max_area=0.0002,min_area=0.0001) + main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.0001) + + img_comm = np.zeros(thresh.shape) + img_comm_in = cv2.fillPoly(img_comm, pts=main_contours, color=(255, 255, 255)) + + img_comm_in = np.repeat(img_comm_in[:, :, np.newaxis], 3, axis=2) + + img_comm_in = img_comm_in.astype(np.uint8) + # img_comm_in_de=self.deskew_images(img_comm_in) + + imgray = cv2.cvtColor(img_comm_in, cv2.COLOR_BGR2GRAY) + + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + boxes = [] + contours_new = [] + for jj in range(len(contours)): + if hirarchy[0][jj][2] == -1: + x, y, w, h = cv2.boundingRect(contours[jj]) + boxes.append([x, y, w, h]) + contours_new.append(contours[jj]) + + return boxes, contours_new + + def get_all_image_patches_based_on_text_regions(self, boxes, image_page): + self.all_text_images = [] + self.all_box_coord = [] + for jk in range(len(boxes)): + crop_img, crop_coor = self.crop_image_inside_box(boxes[jk], image_page) + self.all_text_images.append(crop_img) + self.all_box_coord.append(crop_coor) + + def textline_contours(self, img): + model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir) + img_height_textline = model_textline.layers[len(model_textline.layers) - 1].output_shape[1] + img_width_textline = model_textline.layers[len(model_textline.layers) - 1].output_shape[2] + n_classes = model_textline.layers[len(model_textline.layers) - 1].output_shape[3] + + img_org = img.copy() + + if img.shape[0] < img_height_textline: + img = cv2.resize(img, (img.shape[1], img_width_textline), interpolation=cv2.INTER_NEAREST) + + if img.shape[1] < img_width_textline: + img = cv2.resize(img, (img_height_textline, img.shape[0]), interpolation=cv2.INTER_NEAREST) + + margin = False + if not margin: + + width = img_width_textline + height = img_height_textline + + img = self.otsu_copy(img) + img = img.astype(np.uint8) + # for _ in range(4): + # img = cv2.medianBlur(img,5) + img = img / 255.0 + + img_h = img.shape[0] + img_w = img.shape[1] + + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width) + nyf = img_h / float(height) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + else: + nxf = int(nxf) + + if nyf > int(nyf): + nyf = int(nyf) + 1 + else: + nyf = int(nyf) + + for i in range(nxf): + for j in range(nyf): + index_x_d = i * width + index_x_u = (i + 1) * width + + index_y_d = j * height + index_y_u = (j + 1) * height + + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - width + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - height + + img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + label_p_pred = model_textline.predict( + img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2])) + seg = np.argmax(label_p_pred, axis=3)[0] + seg_color = self.color_images(seg, n_classes) + mask_true[index_y_d:index_y_u, index_x_d:index_x_u] = seg + prediction_true[index_y_d:index_y_u, index_x_d:index_x_u, :] = seg_color + + y_predi = mask_true + y_predi = cv2.resize(y_predi, (img_org.shape[1], img_org.shape[0]), interpolation=cv2.INTER_NEAREST) + return y_predi + + def get_textlines_for_each_textregions(self, textline_mask_tot, boxes): + textline_mask_tot = cv2.erode(textline_mask_tot, self.kernel, iterations=1) + self.area_of_cropped = [] + self.all_text_region_raw = [] + for jk in range(len(boxes)): + crop_img, crop_coor = self.crop_image_inside_box(boxes[jk], + np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) + self.all_text_region_raw.append(crop_img[:, :, 0]) + self.area_of_cropped.append(crop_img.shape[0] * crop_img.shape[1]) + + def seperate_lines(self, mada, contour_text_interest, thetha): + (h, w) = mada.shape[:2] + center = (w // 2, h // 2) + M = cv2.getRotationMatrix2D(center, -thetha, 1.0) + x_d = M[0, 2] + y_d = M[1, 2] + + thetha = thetha / 180. * np.pi + rotation_matrix = np.array([[np.cos(thetha), -np.sin(thetha)], [np.sin(thetha), np.cos(thetha)]]) + contour_text_interest_copy = contour_text_interest.copy() + + x_cont = contour_text_interest[:, 0, 0] + y_cont = contour_text_interest[:, 0, 1] + x_cont = x_cont - np.min(x_cont) + y_cont = y_cont - np.min(y_cont) + + x_min_cont = 0 + x_max_cont = mada.shape[1] + y_min_cont = 0 + y_max_cont = mada.shape[0] + + xv = np.linspace(x_min_cont, x_max_cont, 1000) + + mada_n = mada.sum(axis=1) + + first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) + + y = mada_n[:] # [first_nonzero:last_nonzero] + y_help = np.zeros(len(y) + 40) + y_help[20:len(y) + 20] = y + x = np.array(range(len(y))) + + peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) + if len(peaks_real)<=2 and len(peaks_real)>1: + sigma_gaus=10 + else: + sigma_gaus=8 + + + z= gaussian_filter1d(y_help, sigma_gaus) + zneg_rev=-y_help+np.max(y_help) + zneg=np.zeros(len(zneg_rev)+40) + zneg[20:len(zneg_rev)+20]=zneg_rev + zneg= gaussian_filter1d(zneg, sigma_gaus) + + peaks, _ = find_peaks(z, height=0) + peaks_neg, _ = find_peaks(zneg, height=0) + + peaks_neg = peaks_neg - 20 - 20 + peaks = peaks - 20 + + for jj in range(len(peaks_neg)): + if peaks_neg[jj] > len(x) - 1: + peaks_neg[jj] = len(x) - 1 + + for jj in range(len(peaks)): + if peaks[jj] > len(x) - 1: + peaks[jj] = len(x) - 1 + + textline_boxes = [] + textline_boxes_rot = [] + + if len(peaks_neg) == len(peaks) + 1 and len(peaks) >= 3: + for jj in range(len(peaks)): + dis_to_next_up = abs(peaks[jj] - peaks_neg[jj]) + dis_to_next_down = abs(peaks[jj] - peaks_neg[jj + 1]) + + point_up = peaks[jj] + first_nonzero - int(1.1 * dis_to_next_up) ##+int(dis_to_next_up*1./4.0) + point_down = peaks[jj] + first_nonzero + int(1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./4.0) + + point_down_narrow = peaks[jj] + first_nonzero + int( + 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) + + if point_down >= mada.shape[0]: + point_down = mada.shape[0] - 2 + + if point_down_narrow >= mada.shape[0]: + point_down_narrow = mada.shape[0] - 2 + + distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) + for mj in range(len(xv))] + distances = np.array(distances) + + xvinside = xv[distances >= 0] + + if len(xvinside) == 0: + x_min = x_min_cont + x_max = x_max_cont + else: + x_min = np.min(xvinside) # max(x_min_interest,x_min_cont) + x_max = np.max(xvinside) # min(x_max_interest,x_max_cont) + + p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)]) + p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)]) + p3 = np.dot(rotation_matrix, [int(x_max), int(point_down)]) + p4 = np.dot(rotation_matrix, [int(x_min), int(point_down)]) + + x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d + x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d + x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d + x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d + + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + + textline_boxes.append(np.array([[int(x_min), int(point_up)], + [int(x_max), int(point_up)], + [int(x_max), int(point_down)], + [int(x_min), int(point_down)]])) + + elif len(peaks) < 1: + pass + + elif len(peaks) == 1: + x_min = x_min_cont + x_max = x_max_cont + + y_min = y_min_cont + y_max = y_max_cont + + p1 = np.dot(rotation_matrix, [int(x_min), int(y_min)]) + p2 = np.dot(rotation_matrix, [int(x_max), int(y_min)]) + p3 = np.dot(rotation_matrix, [int(x_max), int(y_max)]) + p4 = np.dot(rotation_matrix, [int(x_min), int(y_max)]) + + x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d + x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d + x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d + x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d + + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + + textline_boxes.append(np.array([[int(x_min), int(y_min)], + [int(x_max), int(y_min)], + [int(x_max), int(y_max)], + [int(x_min), int(y_max)]])) + + + + elif len(peaks) == 2: + dis_to_next = np.abs(peaks[1] - peaks[0]) + for jj in range(len(peaks)): + if jj == 0: + point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) + if point_up < 0: + point_up = 1 + point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) + elif jj == 1: + point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) + if point_down >= mada.shape[0]: + point_down = mada.shape[0] - 2 + point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) + + distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) + for mj in range(len(xv))] + distances = np.array(distances) + + xvinside = xv[distances >= 0] + + if len(xvinside) == 0: + x_min = x_min_cont + x_max = x_max_cont + else: + x_min = np.min(xvinside) # max(x_min_interest,x_min_cont) + x_max = np.max(xvinside) # min(x_max_interest,x_max_cont) + + p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)]) + p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)]) + p3 = np.dot(rotation_matrix, [int(x_max), int(point_down)]) + p4 = np.dot(rotation_matrix, [int(x_min), int(point_down)]) + + x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d + x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d + x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d + x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d + + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + + textline_boxes.append(np.array([[int(x_min), int(point_up)], + [int(x_max), int(point_up)], + [int(x_max), int(point_down)], + [int(x_min), int(point_down)]])) + else: + for jj in range(len(peaks)): + + if jj == 0: + dis_to_next = peaks[jj + 1] - peaks[jj] + # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) + point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) + if point_up < 0: + point_up = 1 + # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next) + point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) + elif jj == len(peaks) - 1: + dis_to_next = peaks[jj] - peaks[jj - 1] + # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next) + point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) + if point_down >= mada.shape[0]: + point_down = mada.shape[0] - 2 + # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) + point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) + else: + dis_to_next_down = peaks[jj + 1] - peaks[jj] + dis_to_next_up = peaks[jj] - peaks[jj - 1] + + point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next_up) + point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next_down) + + distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) + for mj in range(len(xv))] + distances = np.array(distances) + + xvinside = xv[distances >= 0] + + if len(xvinside) == 0: + x_min = x_min_cont + x_max = x_max_cont + else: + x_min = np.min(xvinside) # max(x_min_interest,x_min_cont) + x_max = np.max(xvinside) # min(x_max_interest,x_max_cont) + + p1 = np.dot(rotation_matrix, [int(x_min), int(point_up)]) + p2 = np.dot(rotation_matrix, [int(x_max), int(point_up)]) + p3 = np.dot(rotation_matrix, [int(x_max), int(point_down)]) + p4 = np.dot(rotation_matrix, [int(x_min), int(point_down)]) + + x_min_rot1, point_up_rot1 = p1[0] + x_d, p1[1] + y_d + x_max_rot2, point_up_rot2 = p2[0] + x_d, p2[1] + y_d + x_max_rot3, point_down_rot3 = p3[0] + x_d, p3[1] + y_d + x_min_rot4, point_down_rot4 = p4[0] + x_d, p4[1] + y_d + + textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], + [int(x_max_rot2), int(point_up_rot2)], + [int(x_max_rot3), int(point_down_rot3)], + [int(x_min_rot4), int(point_down_rot4)]])) + + textline_boxes.append(np.array([[int(x_min), int(point_up)], + [int(x_max), int(point_up)], + [int(x_max), int(point_down)], + [int(x_min), int(point_down)]])) + + mada_new = np.zeros((mada.shape[0], mada.shape[1], 3)) + mada_new = cv2.fillPoly(mada_new, pts=textline_boxes, color=(255, 255, 255)) + + mada_new = mada_new.astype(np.uint8) + return mada_new, peaks, textline_boxes_rot + + def textline_contours_postprocessing(self, textline_mask, img_patch, slope, contour_text_interest, box_ind): + + textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 + + textline_mask = textline_mask.astype(np.uint8) + kernel = np.ones((5, 5), np.uint8) + textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, kernel) + textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, kernel) + textline_mask = cv2.erode(textline_mask, kernel, iterations=1) + imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY) + + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) + thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) + + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + commenst_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=0.01, + min_area=0.003) + main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.003) + # interior_contours=self.filter_contours_area_of_image_interiors(thresh,contours,hirarchy,max_area=1,min_area=0) + + img_comm = np.zeros(thresh.shape) + img_comm_in = cv2.fillPoly(img_comm, pts=main_contours, color=(255, 255, 255)) + ###img_comm_in=cv2.fillPoly(img_comm, pts =interior_contours, color=(0,0,0)) + + img_comm_in = np.repeat(img_comm_in[:, :, np.newaxis], 3, axis=2) + img_comm_in = img_comm_in.astype(np.uint8) + + imgray = cv2.cvtColor(img_comm_in, cv2.COLOR_BGR2GRAY) + + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + contours_slope = contours # self.find_polugons_size_filter(contours,median_area=median_area,scaler_up=100,scaler_down=0.5) + + if len(contours_slope) > 0: + for jv in range(len(contours_slope)): + new_poly = list(contours_slope[jv]) + if jv == 0: + merged_all = new_poly + else: + merged_all = merged_all + new_poly + + merge = np.array(merged_all) + + img_in = np.zeros(textline_mask.shape) + img_p_in = cv2.fillPoly(img_in, pts=[merge], color=(255, 255, 255)) + + rect = cv2.minAreaRect(merge) + box = cv2.boxPoints(rect) + box = np.int0(box) + + dst = self.rotate_image(textline_mask, slope) + dst = dst[:, :, 0] + dst[dst != 0] = 1 + + contour_text_copy = contour_text_interest.copy() + + contour_text_copy[:, 0, 0] = contour_text_copy[:, 0, 0] - box_ind[ + 0] # np.min(contour_text_interest_copy[:,0,0]) + contour_text_copy[:, 0, 1] = contour_text_copy[:, 0, 1] - box_ind[1] + + img_contour = np.zeros((box_ind[3], box_ind[2], 3)) + img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=(255, 255, 255)) + + img_contour_rot = self.rotate_image(img_contour, slope) + + # img_comm_in=np.repeat(img_comm_in[:, :, np.newaxis], 3, axis=2) + img_contour_rot = img_contour_rot.astype(np.uint8) + imgrayrot = cv2.cvtColor(img_contour_rot, cv2.COLOR_BGR2GRAY) + _, threshrot = cv2.threshold(imgrayrot, 0, 255, 0) + contours_text_rot, _ = cv2.findContours(threshrot.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))] + ind_big_con = np.argmax(len_con_text_rot) + + textline_maskt = textline_mask[:, :, 0] + textline_maskt[textline_maskt != 0] = 1 + + sep_img, _, contours_rotated_clean = self.seperate_lines(dst, contours_text_rot[ind_big_con], slope) + + dst = self.rotate_image(sep_img, -slope) + + imgray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY) + + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) + thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) + + found_polygons, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + img_in = np.zeros(textline_mask.shape) + img_p_in = cv2.fillPoly(img_in, pts=found_polygons, color=(255, 255, 255)) + else: + + img_in = np.zeros(textline_mask.shape) + + img_p_in = cv2.fillPoly(img_in, pts=commenst_contours, color=(255, 255, 255)) + img_p_in = cv2.dilate(img_p_in, kernel, iterations=1) + contours_rotated_clean = [] + + return img_p_in, contours_rotated_clean + + def textline_contours_to_get_slope_correctly(self, textline_mask, img_patch, contour_interest): + + slope_new = 0 # deskew_images(img_patch) + + textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 + + textline_mask = textline_mask.astype(np.uint8) + textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_OPEN, self.kernel) + textline_mask = cv2.morphologyEx(textline_mask, cv2.MORPH_CLOSE, self.kernel) + textline_mask = cv2.erode(textline_mask, self.kernel, iterations=1) + imgray = cv2.cvtColor(textline_mask, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, self.kernel) + thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, self.kernel) + + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + # commenst_contours=self.filter_contours_area_of_image(thresh,contours,hirarchy,max_area=0.01,min_area=0.003) + main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.003) + # interior_contours=self.filter_contours_area_of_image_interiors(thresh,contours,hirarchy,max_area=1,min_area=0) + + textline_maskt = textline_mask[:, :, 0] + textline_maskt[textline_maskt != 0] = 1 + + _, peaks_point, _ = self.seperate_lines(textline_maskt, contour_interest, slope_new) + + mean_dis = np.mean(np.diff(peaks_point)) + # mean_dis=np.median(np.diff(peaks_point)) + + len_x = thresh.shape[1] + # print(len_x,mean_dis,'x') + + slope_lines = [] + contours_slope_new = [] + for kk in range(len(main_contours)): + + xminh = np.min(main_contours[kk][:, 0]) + xmaxh = np.max(main_contours[kk][:, 0]) + + yminh = np.min(main_contours[kk][:, 1]) + ymaxh = np.max(main_contours[kk][:, 1]) + + # print(xminh,xmaxh ,yminh,ymaxh,ymaxh-yminh) + + if ymaxh - yminh <= mean_dis and ( + xmaxh - xminh) >= 0.3 * len_x: # xminh>=0.05*len_x and xminh<=0.4*len_x and xmaxh<=0.95*len_x and xmaxh>=0.6*len_x: + contours_slope_new.append(main_contours[kk]) + + rows, cols = thresh.shape[:2] + [vx, vy, x, y] = cv2.fitLine(main_contours[kk], cv2.DIST_L2, 0, 0.01, 0.01) + + slope_lines.append((vy / vx) / np.pi * 180) + + if len(slope_lines) >= 2: + + slope = np.mean(slope_lines) # slope_true/np.pi*180 + else: + slope = 999 + + else: + slope = 0 + + return slope + + def get_slopes_for_each_text_region(self, contours): + + # first let find the slop for biggest patch of text region + + index_max_area = np.argmax(self.area_of_cropped) + + denoised = cv2.blur(self.all_text_images[index_max_area], (5, 5)) # otsu_copy(crop_img)# + denoised = cv2.medianBlur(denoised, 5) # cv2.GaussianBlur(crop_img, (5, 5), 0) + denoised = cv2.GaussianBlur(denoised, (5, 5), 0) + denoised = self.otsu_copy(denoised) + denoised = denoised.astype(np.uint8) + slope_biggest = self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[index_max_area], + denoised, contours[index_max_area]) + + if np.abs(slope_biggest) > 2.5: + slope_biggest = 0 + + self.slopes = [] + for mv in range(len(self.all_text_images)): + denoised = cv2.blur(self.all_text_images[mv], (5, 5)) # otsu_copy(crop_img)# + denoised = cv2.medianBlur(denoised, 5) # cv2.GaussianBlur(crop_img, (5, 5), 0) + denoised = cv2.GaussianBlur(denoised, (5, 5), 0) + denoised = self.otsu_copy(denoised) + denoised = denoised.astype(np.uint8) + slope_for_all = self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[mv], denoised, + contours[mv]) + # text_patch_processed=textline_contours_postprocessing(gada) + + if np.abs(slope_for_all) > 2.5 and slope_for_all != 999: + slope_for_all = 0 + elif slope_for_all == 999: + slope_for_all = slope_biggest + self.slopes.append(slope_for_all) + + def deskew_textline_patches(self, contours, boxes): + self.all_text_region_processed = [] + self.all_found_texline_polygons = [] + + for jj in range(len(self.all_text_images)): + # print(all_text_images[jj][0,0,0],np.unique(all_text_images[jj][:,:,0])) + ###gada=self.all_text_images[jj][:,:,0] + ###gada=(gada[:,:]==0)*1 + # print(gada[0,0]) + + denoised = cv2.blur(self.all_text_images[jj], (5, 5)) # otsu_copy(crop_img)# + denoised = cv2.medianBlur(denoised, 5) # cv2.GaussianBlur(crop_img, (5, 5), 0) + denoised = cv2.GaussianBlur(denoised, (5, 5), 0) + denoised = self.otsu_copy(denoised) + denoised = denoised.astype(np.uint8) + text_patch_processed, cnt_clean_rot = self.textline_contours_postprocessing(self.all_text_region_raw[jj] + , denoised, self.slopes[jj], + contours[jj], boxes[jj]) + # text_patch_processed=textline_contours_postprocessing(gada) + self.all_text_region_processed.append(text_patch_processed) + + text_patch_processed = text_patch_processed.astype(np.uint8) + imgray = cv2.cvtColor(text_patch_processed, cv2.COLOR_BGR2GRAY) + + _, thresh = cv2.threshold(imgray, 0, 255, 0) + + self.found_polygons, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + ####all_found_texline_polygons.append(found_polygons)cnt_clean_rot + self.all_found_texline_polygons.append(cnt_clean_rot) + + # img_v=np.zeros(text_patch_processed.shape) + # img_v=cv2.fillPoly(img_v, pts =found_polygons, color=(255,255,255)) + # sumi=np.sum(np.sum(self.all_text_images[jj],axis=2),axis=1) + + def write_into_page_xml(self, contours, page_coord, dir_of_image): + + found_polygons_text_region = contours + data = ET.Element('PcGts') + + data.set('xmlns', "http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15") + data.set('xmlns:xsi', "http://www.w3.org/2001/XMLSchema-instance") + data.set('xsi:schemaLocation', "http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15") + # data.set('http',"http://schema.primaresearch.org/PAGE/gts/pagecontent/2018-07-15/pagecontent.xsd") + + metadata = ET.SubElement(data, 'Metadata') + + author = ET.SubElement(metadata, 'Creator') + author.text = 'Vahid' + + created = ET.SubElement(metadata, 'Created') + created.text = '2019-06-17T18:15:12' + + changetime = ET.SubElement(metadata, 'LastChange') + changetime.text = '2019-06-17T18:15:12' + + page = ET.SubElement(data, 'Page') + + page.set('imageFilename', self.f_name + '.tif') + page.set('imageHeight', str(self.height_org)) + page.set('imageWidth', str(self.width_org)) + + id_indexer = 0 + + for mm in range(len(found_polygons_text_region)): + textregion = ET.SubElement(page, 'TextRegion') + + textregion.set('id', 'r' + str(id_indexer)) + id_indexer += 1 + + if mm == 0: + textregion.set('type', 'heading') + else: + textregion.set('type', 'paragraph') + coord_text = ET.SubElement(textregion, 'Coords') + + points_co = '' + for lmm in range(len(found_polygons_text_region[mm])): + + if len(found_polygons_text_region[mm][lmm]) == 2: + points_co = points_co + str( + int((found_polygons_text_region[mm][lmm][0] + page_coord[2]) / self.scale_x)) + points_co = points_co + ',' + points_co = points_co + str( + int((found_polygons_text_region[mm][lmm][1] + page_coord[0]) / self.scale_y)) + else: + points_co = points_co + str( + int((found_polygons_text_region[mm][lmm][0][0] + page_coord[2]) / self.scale_x)) + points_co = points_co + ',' + points_co = points_co + str( + int((found_polygons_text_region[mm][lmm][0][1] + page_coord[0]) / self.scale_y)) + + if lmm < (len(found_polygons_text_region[mm]) - 1): + points_co = points_co + ' ' + # print(points_co) + coord_text.set('points', points_co) + + for j in range(len(self.all_found_texline_polygons[mm])): + + textline = ET.SubElement(textregion, 'TextLine') + + textline.set('id', 'l' + str(id_indexer)) + + id_indexer += 1 + + coord = ET.SubElement(textline, 'Coords') + + texteq = ET.SubElement(textline, 'TextEquiv') + + uni = ET.SubElement(texteq, 'Unicode') + uni.text = ' ' + + # points = ET.SubElement(coord, 'Points') + + points_co = '' + for l in range(len(self.all_found_texline_polygons[mm][j])): + # point = ET.SubElement(coord, 'Point') + + # point.set('x',str(found_polygons[j][l][0])) + # point.set('y',str(found_polygons[j][l][1])) + if len(self.all_found_texline_polygons[mm][j][l]) == 2: + points_co = points_co + str(int((self.all_found_texline_polygons[mm][j][l][0] + page_coord[2] + + self.all_box_coord[mm][2]) / self.scale_x)) + points_co = points_co + ',' + points_co = points_co + str(int((self.all_found_texline_polygons[mm][j][l][1] + page_coord[0] + + self.all_box_coord[mm][0]) / self.scale_y)) + else: + points_co = points_co + str(int((self.all_found_texline_polygons[mm][j][l][0][0] + page_coord[2] + + self.all_box_coord[mm][2]) / self.scale_x)) + points_co = points_co + ',' + points_co = points_co + str(int((self.all_found_texline_polygons[mm][j][l][0][1] + page_coord[0] + + self.all_box_coord[mm][0]) / self.scale_y)) + + if l < (len(self.all_found_texline_polygons[mm][j]) - 1): + points_co = points_co + ' ' + # print(points_co) + coord.set('points', points_co) + + texteqreg = ET.SubElement(textregion, 'TextEquiv') + + unireg = ET.SubElement(texteqreg, 'Unicode') + unireg.text = ' ' + + tree = ET.ElementTree(data) + tree.write(dir_of_image + self.f_name + ".xml") + + def run(self): + self.get_image_and_scales() + image_page,page_coord=self.extract_page() + text_regions=self.extract_text_regions(image_page) + boxes,contours=self.get_text_region_contours_and_boxes(text_regions) + self.get_all_image_patches_based_on_text_regions(boxes,image_page) + textline_mask_tot=self.textline_contours(image_page) + + self.get_textlines_for_each_textregions(textline_mask_tot,boxes) + self.get_slopes_for_each_text_region(contours) + self.deskew_textline_patches(contours, boxes) + self.write_into_page_xml(contours, page_coord, self.dir_out) + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument('-i', '--image', dest='inp1', default=None, help='directory of image.') + parser.add_argument('-o', '--out', dest='inp2', default=None, help='directory to write output xml data.') + parser.add_argument('-m', '--model', dest='inp3', default=None, help='directory of models.') + + options = parser.parse_args() + + possibles = globals() + possibles.update(locals()) + x = textlineerkenner(options.inp1, options.inp2, options.inp3) + x.run() + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4b9bf38 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +opencv-python +numpy<=1.14.5 +matplotlib +seaborn +tqdm +keras +shapely +scikit-learn +tensorflow-gpu < 2.0 +scipy diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0b0e9df --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +from io import open +from setuptools import find_packages, setup + +with open('requirements.txt') as fp: + install_requires = fp.read() + +setup( + name="qurator-sbb-textline", + version="0.0.1", + author="The Qurator Team", + author_email="qurator@sbb.spk-berlin.de", + description="Qurator", + long_description=open("README.md", "r", encoding='utf-8').read(), + long_description_content_type="text/markdown", + keywords='qurator', + license='Apache', + url="https://qurator.ai", + packages=find_packages(exclude=["*.tests", "*.tests.*", + "tests.*", "tests"]), + install_requires=install_requires, + entry_points={ + 'console_scripts': [ + "text_line_recognition=main:main", + ] + }, + python_requires='>=3.6.0', + tests_require=['pytest'], + classifiers=[ + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + ], +) \ No newline at end of file