From 21a47c081acc5c0402d3cb1ff1dcd38aad595d4e Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 23 Feb 2024 20:18:38 +0100 Subject: [PATCH 01/34] refactor namespace to `eynollah` --- {qurator => eynollah}/.gitkeep | 0 {qurator => eynollah}/__init__.py | 0 {qurator => eynollah}/eynollah/__init__.py | 0 {qurator => eynollah}/eynollah/cli.py | 2 +- {qurator => eynollah}/eynollah/eynollah.py | 0 {qurator => eynollah}/eynollah/ocrd-tool.json | 0 {qurator => eynollah}/eynollah/ocrd_cli.py | 0 {qurator => eynollah}/eynollah/plot.py | 0 {qurator => eynollah}/eynollah/processor.py | 0 {qurator => eynollah}/eynollah/utils/__init__.py | 0 {qurator => eynollah}/eynollah/utils/contour.py | 0 {qurator => eynollah}/eynollah/utils/counter.py | 0 .../eynollah/utils/drop_capitals.py | 0 {qurator => eynollah}/eynollah/utils/is_nan.py | 0 {qurator => eynollah}/eynollah/utils/marginals.py | 0 {qurator => eynollah}/eynollah/utils/pil_cv2.py | 0 {qurator => eynollah}/eynollah/utils/resize.py | 0 {qurator => eynollah}/eynollah/utils/rotate.py | 0 .../eynollah/utils/separate_lines.py | 0 {qurator => eynollah}/eynollah/utils/xml.py | 0 {qurator => eynollah}/eynollah/writer.py | 0 setup.py | 6 +++--- tests/test_counter.py | 2 +- tests/test_dpi.py | 2 +- tests/test_run.py | 2 +- tests/test_smoke.py | 12 ++++++------ tests/test_xml.py | 2 +- 27 files changed, 14 insertions(+), 14 deletions(-) rename {qurator => eynollah}/.gitkeep (100%) rename {qurator => eynollah}/__init__.py (100%) rename {qurator => eynollah}/eynollah/__init__.py (100%) rename {qurator => eynollah}/eynollah/cli.py (99%) rename {qurator => eynollah}/eynollah/eynollah.py (100%) rename {qurator => eynollah}/eynollah/ocrd-tool.json (100%) rename {qurator => eynollah}/eynollah/ocrd_cli.py (100%) rename {qurator => eynollah}/eynollah/plot.py (100%) rename {qurator => eynollah}/eynollah/processor.py (100%) rename {qurator => eynollah}/eynollah/utils/__init__.py (100%) rename {qurator => eynollah}/eynollah/utils/contour.py (100%) rename {qurator => eynollah}/eynollah/utils/counter.py (100%) rename {qurator => eynollah}/eynollah/utils/drop_capitals.py (100%) rename {qurator => eynollah}/eynollah/utils/is_nan.py (100%) rename {qurator => eynollah}/eynollah/utils/marginals.py (100%) rename {qurator => eynollah}/eynollah/utils/pil_cv2.py (100%) rename {qurator => eynollah}/eynollah/utils/resize.py (100%) rename {qurator => eynollah}/eynollah/utils/rotate.py (100%) rename {qurator => eynollah}/eynollah/utils/separate_lines.py (100%) rename {qurator => eynollah}/eynollah/utils/xml.py (100%) rename {qurator => eynollah}/eynollah/writer.py (100%) diff --git a/qurator/.gitkeep b/eynollah/.gitkeep similarity index 100% rename from qurator/.gitkeep rename to eynollah/.gitkeep diff --git a/qurator/__init__.py b/eynollah/__init__.py similarity index 100% rename from qurator/__init__.py rename to eynollah/__init__.py diff --git a/qurator/eynollah/__init__.py b/eynollah/eynollah/__init__.py similarity index 100% rename from qurator/eynollah/__init__.py rename to eynollah/eynollah/__init__.py diff --git a/qurator/eynollah/cli.py b/eynollah/eynollah/cli.py similarity index 99% rename from qurator/eynollah/cli.py rename to eynollah/eynollah/cli.py index a2a2ad0..b720d83 100644 --- a/qurator/eynollah/cli.py +++ b/eynollah/eynollah/cli.py @@ -1,7 +1,7 @@ import sys import click from ocrd_utils import initLogging, setOverrideLogLevel -from qurator.eynollah.eynollah import Eynollah +from eynollah.eynollah.eynollah import Eynollah @click.command() diff --git a/qurator/eynollah/eynollah.py b/eynollah/eynollah/eynollah.py similarity index 100% rename from qurator/eynollah/eynollah.py rename to eynollah/eynollah/eynollah.py diff --git a/qurator/eynollah/ocrd-tool.json b/eynollah/eynollah/ocrd-tool.json similarity index 100% rename from qurator/eynollah/ocrd-tool.json rename to eynollah/eynollah/ocrd-tool.json diff --git a/qurator/eynollah/ocrd_cli.py b/eynollah/eynollah/ocrd_cli.py similarity index 100% rename from qurator/eynollah/ocrd_cli.py rename to eynollah/eynollah/ocrd_cli.py diff --git a/qurator/eynollah/plot.py b/eynollah/eynollah/plot.py similarity index 100% rename from qurator/eynollah/plot.py rename to eynollah/eynollah/plot.py diff --git a/qurator/eynollah/processor.py b/eynollah/eynollah/processor.py similarity index 100% rename from qurator/eynollah/processor.py rename to eynollah/eynollah/processor.py diff --git a/qurator/eynollah/utils/__init__.py b/eynollah/eynollah/utils/__init__.py similarity index 100% rename from qurator/eynollah/utils/__init__.py rename to eynollah/eynollah/utils/__init__.py diff --git a/qurator/eynollah/utils/contour.py b/eynollah/eynollah/utils/contour.py similarity index 100% rename from qurator/eynollah/utils/contour.py rename to eynollah/eynollah/utils/contour.py diff --git a/qurator/eynollah/utils/counter.py b/eynollah/eynollah/utils/counter.py similarity index 100% rename from qurator/eynollah/utils/counter.py rename to eynollah/eynollah/utils/counter.py diff --git a/qurator/eynollah/utils/drop_capitals.py b/eynollah/eynollah/utils/drop_capitals.py similarity index 100% rename from qurator/eynollah/utils/drop_capitals.py rename to eynollah/eynollah/utils/drop_capitals.py diff --git a/qurator/eynollah/utils/is_nan.py b/eynollah/eynollah/utils/is_nan.py similarity index 100% rename from qurator/eynollah/utils/is_nan.py rename to eynollah/eynollah/utils/is_nan.py diff --git a/qurator/eynollah/utils/marginals.py b/eynollah/eynollah/utils/marginals.py similarity index 100% rename from qurator/eynollah/utils/marginals.py rename to eynollah/eynollah/utils/marginals.py diff --git a/qurator/eynollah/utils/pil_cv2.py b/eynollah/eynollah/utils/pil_cv2.py similarity index 100% rename from qurator/eynollah/utils/pil_cv2.py rename to eynollah/eynollah/utils/pil_cv2.py diff --git a/qurator/eynollah/utils/resize.py b/eynollah/eynollah/utils/resize.py similarity index 100% rename from qurator/eynollah/utils/resize.py rename to eynollah/eynollah/utils/resize.py diff --git a/qurator/eynollah/utils/rotate.py b/eynollah/eynollah/utils/rotate.py similarity index 100% rename from qurator/eynollah/utils/rotate.py rename to eynollah/eynollah/utils/rotate.py diff --git a/qurator/eynollah/utils/separate_lines.py b/eynollah/eynollah/utils/separate_lines.py similarity index 100% rename from qurator/eynollah/utils/separate_lines.py rename to eynollah/eynollah/utils/separate_lines.py diff --git a/qurator/eynollah/utils/xml.py b/eynollah/eynollah/utils/xml.py similarity index 100% rename from qurator/eynollah/utils/xml.py rename to eynollah/eynollah/utils/xml.py diff --git a/qurator/eynollah/writer.py b/eynollah/eynollah/writer.py similarity index 100% rename from qurator/eynollah/writer.py rename to eynollah/eynollah/writer.py diff --git a/setup.py b/setup.py index 9abf158..bc836df 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setup( author='Vahid Rezanezhad', url='https://github.com/qurator-spk/eynollah', license='Apache License 2.0', - namespace_packages=['qurator'], + namespace_packages=['eynollah'], packages=find_packages(exclude=['tests']), install_requires=install_requires, package_data={ @@ -21,8 +21,8 @@ setup( }, entry_points={ 'console_scripts': [ - 'eynollah=qurator.eynollah.cli:main', - 'ocrd-eynollah-segment=qurator.eynollah.ocrd_cli:main', + 'eynollah=eynollah.eynollah.cli:main', + 'ocrd-eynollah-segment=eynollah.eynollah.ocrd_cli:main', ] }, ) diff --git a/tests/test_counter.py b/tests/test_counter.py index 8ef0756..125b7c1 100644 --- a/tests/test_counter.py +++ b/tests/test_counter.py @@ -1,5 +1,5 @@ from tests.base import main -from qurator.eynollah.utils.counter import EynollahIdCounter +from eynollah.eynollah.utils.counter import EynollahIdCounter def test_counter_string(): c = EynollahIdCounter() diff --git a/tests/test_dpi.py b/tests/test_dpi.py index 510ffc5..6317cd4 100644 --- a/tests/test_dpi.py +++ b/tests/test_dpi.py @@ -1,6 +1,6 @@ import cv2 from pathlib import Path -from qurator.eynollah.utils.pil_cv2 import check_dpi +from eynollah.eynollah.utils.pil_cv2 import check_dpi from tests.base import main def test_dpi(): diff --git a/tests/test_run.py b/tests/test_run.py index b1137e7..eeee964 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -2,7 +2,7 @@ from os import environ from pathlib import Path from ocrd_utils import pushd_popd from tests.base import CapturingTestCase as TestCase, main -from qurator.eynollah.cli import main as eynollah_cli +from eynollah.eynollah.cli import main as eynollah_cli testdir = Path(__file__).parent.resolve() diff --git a/tests/test_smoke.py b/tests/test_smoke.py index d069479..b0a7846 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1,7 +1,7 @@ def test_utils_import(): - import qurator.eynollah.utils - import qurator.eynollah.utils.contour - import qurator.eynollah.utils.drop_capitals - import qurator.eynollah.utils.drop_capitals - import qurator.eynollah.utils.is_nan - import qurator.eynollah.utils.rotate + import eynollah.eynollah.utils + import eynollah.eynollah.utils.contour + import eynollah.eynollah.utils.drop_capitals + import eynollah.eynollah.utils.drop_capitals + import eynollah.eynollah.utils.is_nan + import eynollah.eynollah.utils.rotate diff --git a/tests/test_xml.py b/tests/test_xml.py index 8422fd1..c8bac53 100644 --- a/tests/test_xml.py +++ b/tests/test_xml.py @@ -1,5 +1,5 @@ from pytest import main -from qurator.eynollah.utils.xml import create_page_xml +from eynollah.eynollah.utils.xml import create_page_xml from ocrd_models.ocrd_page import to_xml PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15' From a08020dba10e4a25a33b53f615de7ce998d14cf8 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 23 Feb 2024 20:22:21 +0100 Subject: [PATCH 02/34] copy sbb_pixelwise_segmentation into `train` --- eynollah/eynollah/train/README.md | 67 +++ ..._model_load_pretrained_weights_and_save.py | 33 ++ eynollah/eynollah/train/config_params.json | 30 ++ eynollah/eynollah/train/metrics.py | 338 ++++++++++++ eynollah/eynollah/train/models.py | 317 +++++++++++ eynollah/eynollah/train/train.py | 238 +++++++++ eynollah/eynollah/train/utils.py | 497 ++++++++++++++++++ 7 files changed, 1520 insertions(+) create mode 100644 eynollah/eynollah/train/README.md create mode 100644 eynollah/eynollah/train/build_model_load_pretrained_weights_and_save.py create mode 100644 eynollah/eynollah/train/config_params.json create mode 100644 eynollah/eynollah/train/metrics.py create mode 100644 eynollah/eynollah/train/models.py create mode 100644 eynollah/eynollah/train/train.py create mode 100644 eynollah/eynollah/train/utils.py diff --git a/eynollah/eynollah/train/README.md b/eynollah/eynollah/train/README.md new file mode 100644 index 0000000..8acfa12 --- /dev/null +++ b/eynollah/eynollah/train/README.md @@ -0,0 +1,67 @@ +# Pixelwise Segmentation +> Pixelwise segmentation for document images + +## Introduction +This repository contains the source code for training an encoder model for document image segmentation. + +## Installation +Either clone the repository via `git clone https://github.com/qurator-spk/sbb_pixelwise_segmentation.git` or download and unpack the [ZIP](https://github.com/qurator-spk/sbb_pixelwise_segmentation/archive/master.zip). + +### Pretrained encoder +Download our pretrained weights and add them to a ``pretrained_model`` folder: +https://qurator-data.de/sbb_pixelwise_segmentation/pretrained_encoder/ +## Usage + +### Train +To train a model, run: ``python train.py with config_params.json`` + +### Ground truth format +Lables for each pixel are identified by a number. So if you have a +binary case, ``n_classes`` should be set to ``2`` and labels should +be ``0`` and ``1`` for each class and pixel. + +In the case of multiclass, just set ``n_classes`` to the number of classes +you have and the try to produce the labels by pixels set from ``0 , 1 ,2 .., n_classes-1``. +The labels format should be png. +Our lables are 3 channel png images but only information of first channel is used. +If you have an image label with height and width of 10, for a binary case the first channel should look like this: + + Label: [ [1, 0, 0, 1, 1, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ..., + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ] + + This means that you have an image by `10*10*3` and `pixel[0,0]` belongs + to class `1` and `pixel[0,1]` belongs to class `0`. + + A small sample of training data for binarization experiment can be found here, [Training data sample](https://qurator-data.de/~vahid.rezanezhad/binarization_training_data_sample/), which contains images and lables folders. + +### Training , evaluation and output +The train and evaluation folders should contain subfolders of images and labels. +The output folder should be an empty folder where the output model will be written to. + +### Parameter configuration +* patches: If you want to break input images into smaller patches (input size of the model) you need to set this parameter to ``true``. In the case that the model should see the image once, like page extraction, patches should be set to ``false``. +* n_batch: Number of batches at each iteration. +* n_classes: Number of classes. In the case of binary classification this should be 2. +* n_epochs: Number of epochs. +* input_height: This indicates the height of model's input. +* input_width: This indicates the width of model's input. +* weight_decay: Weight decay of l2 regularization of model layers. +* augmentation: If you want to apply any kind of augmentation this parameter should first set to ``true``. +* flip_aug: If ``true``, different types of filp will be applied on image. Type of flips is given with "flip_index" in train.py file. +* blur_aug: If ``true``, different types of blurring will be applied on image. Type of blurrings is given with "blur_k" in train.py file. +* scaling: If ``true``, scaling will be applied on image. Scale of scaling is given with "scales" in train.py file. +* rotation_not_90: If ``true``, rotation (not 90 degree) will be applied on image. Rothation angles are given with "thetha" in train.py file. +* rotation: If ``true``, 90 degree rotation will be applied on image. +* binarization: If ``true``,Otsu thresholding will be applied to augment the input data with binarized images. +* scaling_bluring: If ``true``, combination of scaling and blurring will be applied on image. +* scaling_binarization: If ``true``, combination of scaling and binarization will be applied on image. +* scaling_flip: If ``true``, combination of scaling and flip will be applied on image. +* continue_training: If ``true``, it means that you have already trained a model and you would like to continue the training. So it is needed to provide the dir of trained model with "dir_of_start_model" and index for naming the models. For example if you have already trained for 3 epochs then your last index is 2 and if you want to continue from model_1.h5, you can set "index_start" to 3 to start naming model with index 3. +* weighted_loss: If ``true``, this means that you want to apply weighted categorical_crossentropy as loss fucntion. Be carefull if you set to ``true``the parameter "is_loss_soft_dice" should be ``false`` +* data_is_provided: If you have already provided the input data you can set this to ``true``. Be sure that the train and eval data are in "dir_output". Since when once we provide training data we resize and augment them and then we write them in sub-directories train and eval in "dir_output". +* dir_train: This is the directory of "images" and "labels" (dir_train should include two subdirectories with names of images and labels ) for raw images and labels. Namely they are not prepared (not resized and not augmented) yet for training the model. When we run this tool these raw data will be transformed to suitable size needed for the model and they will be written in "dir_output" in train and eval directories. Each of train and eval include "images" and "labels" sub-directories. + + diff --git a/eynollah/eynollah/train/build_model_load_pretrained_weights_and_save.py b/eynollah/eynollah/train/build_model_load_pretrained_weights_and_save.py new file mode 100644 index 0000000..251e698 --- /dev/null +++ b/eynollah/eynollah/train/build_model_load_pretrained_weights_and_save.py @@ -0,0 +1,33 @@ +import os +import sys +import tensorflow as tf +import keras , warnings +from keras.optimizers import * +from sacred import Experiment +from models import * +from utils import * +from metrics import * + + + + +def configuration(): + gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) + session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) + + +if __name__=='__main__': + n_classes = 2 + input_height = 224 + input_width = 448 + weight_decay = 1e-6 + pretraining = False + dir_of_weights = 'model_bin_sbb_ens.h5' + + #configuration() + + model = resnet50_unet(n_classes, input_height, input_width,weight_decay,pretraining) + model.load_weights(dir_of_weights) + model.save('./name_in_another_python_version.h5') + + diff --git a/eynollah/eynollah/train/config_params.json b/eynollah/eynollah/train/config_params.json new file mode 100644 index 0000000..eaa50e1 --- /dev/null +++ b/eynollah/eynollah/train/config_params.json @@ -0,0 +1,30 @@ +{ + "n_classes" : 3, + "n_epochs" : 2, + "input_height" : 448, + "input_width" : 672, + "weight_decay" : 1e-6, + "n_batch" : 2, + "learning_rate": 1e-4, + "patches" : true, + "pretraining" : true, + "augmentation" : false, + "flip_aug" : false, + "blur_aug" : false, + "scaling" : true, + "binarization" : false, + "scaling_bluring" : false, + "scaling_binarization" : false, + "scaling_flip" : false, + "rotation": false, + "rotation_not_90": false, + "continue_training": false, + "index_start": 0, + "dir_of_start_model": " ", + "weighted_loss": false, + "is_loss_soft_dice": false, + "data_is_provided": false, + "dir_train": "/home/vahid/Documents/handwrittens_train/train", + "dir_eval": "/home/vahid/Documents/handwrittens_train/eval", + "dir_output": "/home/vahid/Documents/handwrittens_train/output" +} diff --git a/eynollah/eynollah/train/metrics.py b/eynollah/eynollah/train/metrics.py new file mode 100644 index 0000000..c63cc22 --- /dev/null +++ b/eynollah/eynollah/train/metrics.py @@ -0,0 +1,338 @@ +from keras import backend as K +import tensorflow as tf +import numpy as np + +def focal_loss(gamma=2., alpha=4.): + + gamma = float(gamma) + alpha = float(alpha) + + def focal_loss_fixed(y_true, y_pred): + """Focal loss for multi-classification + FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t) + Notice: y_pred is probability after softmax + gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper + d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x) + Focal Loss for Dense Object Detection + https://arxiv.org/abs/1708.02002 + + Arguments: + y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls] + y_pred {tensor} -- model's output, shape of [batch_size, num_cls] + + Keyword Arguments: + gamma {float} -- (default: {2.0}) + alpha {float} -- (default: {4.0}) + + Returns: + [tensor] -- loss. + """ + epsilon = 1.e-9 + y_true = tf.convert_to_tensor(y_true, tf.float32) + y_pred = tf.convert_to_tensor(y_pred, tf.float32) + + model_out = tf.add(y_pred, epsilon) + ce = tf.multiply(y_true, -tf.log(model_out)) + weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma)) + fl = tf.multiply(alpha, tf.multiply(weight, ce)) + reduced_fl = tf.reduce_max(fl, axis=1) + return tf.reduce_mean(reduced_fl) + return focal_loss_fixed + +def weighted_categorical_crossentropy(weights=None): + """ weighted_categorical_crossentropy + + Args: + * weights: crossentropy weights + Returns: + * weighted categorical crossentropy function + """ + + def loss(y_true, y_pred): + labels_floats = tf.cast(y_true, tf.float32) + per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats,logits=y_pred) + + if weights is not None: + weight_mask = tf.maximum(tf.reduce_max(tf.constant( + np.array(weights, dtype=np.float32)[None, None, None]) + * labels_floats, axis=-1), 1.0) + per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None] + return tf.reduce_mean(per_pixel_loss) + return loss +def image_categorical_cross_entropy(y_true, y_pred, weights=None): + """ + :param y_true: tensor of shape (batch_size, height, width) representing the ground truth. + :param y_pred: tensor of shape (batch_size, height, width) representing the prediction. + :return: The mean cross-entropy on softmaxed tensors. + """ + + labels_floats = tf.cast(y_true, tf.float32) + per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats,logits=y_pred) + + if weights is not None: + weight_mask = tf.maximum( + tf.reduce_max(tf.constant( + np.array(weights, dtype=np.float32)[None, None, None]) + * labels_floats, axis=-1), 1.0) + per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None] + + return tf.reduce_mean(per_pixel_loss) +def class_tversky(y_true, y_pred): + smooth = 1.0#1.00 + + y_true = K.permute_dimensions(y_true, (3,1,2,0)) + y_pred = K.permute_dimensions(y_pred, (3,1,2,0)) + + y_true_pos = K.batch_flatten(y_true) + y_pred_pos = K.batch_flatten(y_pred) + true_pos = K.sum(y_true_pos * y_pred_pos, 1) + false_neg = K.sum(y_true_pos * (1-y_pred_pos), 1) + false_pos = K.sum((1-y_true_pos)*y_pred_pos, 1) + alpha = 0.2#0.5 + beta=0.8 + return (true_pos + smooth)/(true_pos + alpha*false_neg + (beta)*false_pos + smooth) + +def focal_tversky_loss(y_true,y_pred): + pt_1 = class_tversky(y_true, y_pred) + gamma =1.3#4./3.0#1.3#4.0/3.00# 0.75 + return K.sum(K.pow((1-pt_1), gamma)) + +def generalized_dice_coeff2(y_true, y_pred): + n_el = 1 + for dim in y_true.shape: + n_el *= int(dim) + n_cl = y_true.shape[-1] + w = K.zeros(shape=(n_cl,)) + w = (K.sum(y_true, axis=(0,1,2)))/(n_el) + w = 1/(w**2+0.000001) + numerator = y_true*y_pred + numerator = w*K.sum(numerator,(0,1,2)) + numerator = K.sum(numerator) + denominator = y_true+y_pred + denominator = w*K.sum(denominator,(0,1,2)) + denominator = K.sum(denominator) + return 2*numerator/denominator +def generalized_dice_coeff(y_true, y_pred): + axes = tuple(range(1, len(y_pred.shape)-1)) + Ncl = y_pred.shape[-1] + w = K.zeros(shape=(Ncl,)) + w = K.sum(y_true, axis=axes) + w = 1/(w**2+0.000001) + # Compute gen dice coef: + numerator = y_true*y_pred + numerator = w*K.sum(numerator,axes) + numerator = K.sum(numerator) + + denominator = y_true+y_pred + denominator = w*K.sum(denominator,axes) + denominator = K.sum(denominator) + + gen_dice_coef = 2*numerator/denominator + + return gen_dice_coef + +def generalized_dice_loss(y_true, y_pred): + return 1 - generalized_dice_coeff2(y_true, y_pred) +def soft_dice_loss(y_true, y_pred, epsilon=1e-6): + ''' + Soft dice loss calculation for arbitrary batch size, number of classes, and number of spatial dimensions. + Assumes the `channels_last` format. + + # Arguments + y_true: b x X x Y( x Z...) x c One hot encoding of ground truth + y_pred: b x X x Y( x Z...) x c Network output, must sum to 1 over c channel (such as after softmax) + epsilon: Used for numerical stability to avoid divide by zero errors + + # References + V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation + https://arxiv.org/abs/1606.04797 + More details on Dice loss formulation + https://mediatum.ub.tum.de/doc/1395260/1395260.pdf (page 72) + + Adapted from https://github.com/Lasagne/Recipes/issues/99#issuecomment-347775022 + ''' + + # skip the batch and class axis for calculating Dice score + axes = tuple(range(1, len(y_pred.shape)-1)) + + numerator = 2. * K.sum(y_pred * y_true, axes) + + denominator = K.sum(K.square(y_pred) + K.square(y_true), axes) + return 1.00 - K.mean(numerator / (denominator + epsilon)) # average over classes and batch + +def seg_metrics(y_true, y_pred, metric_name, metric_type='standard', drop_last = True, mean_per_class=False, verbose=False): + """ + Compute mean metrics of two segmentation masks, via Keras. + + IoU(A,B) = |A & B| / (| A U B|) + Dice(A,B) = 2*|A & B| / (|A| + |B|) + + Args: + y_true: true masks, one-hot encoded. + y_pred: predicted masks, either softmax outputs, or one-hot encoded. + metric_name: metric to be computed, either 'iou' or 'dice'. + metric_type: one of 'standard' (default), 'soft', 'naive'. + In the standard version, y_pred is one-hot encoded and the mean + is taken only over classes that are present (in y_true or y_pred). + The 'soft' version of the metrics are computed without one-hot + encoding y_pred. + The 'naive' version return mean metrics where absent classes contribute + to the class mean as 1.0 (instead of being dropped from the mean). + drop_last = True: boolean flag to drop last class (usually reserved + for background class in semantic segmentation) + mean_per_class = False: return mean along batch axis for each class. + verbose = False: print intermediate results such as intersection, union + (as number of pixels). + Returns: + IoU/Dice of y_true and y_pred, as a float, unless mean_per_class == True + in which case it returns the per-class metric, averaged over the batch. + + Inputs are B*W*H*N tensors, with + B = batch size, + W = width, + H = height, + N = number of classes + """ + + flag_soft = (metric_type == 'soft') + flag_naive_mean = (metric_type == 'naive') + + # always assume one or more classes + num_classes = K.shape(y_true)[-1] + + if not flag_soft: + # get one-hot encoded masks from y_pred (true masks should already be one-hot) + y_pred = K.one_hot(K.argmax(y_pred), num_classes) + y_true = K.one_hot(K.argmax(y_true), num_classes) + + # if already one-hot, could have skipped above command + # keras uses float32 instead of float64, would give error down (but numpy arrays or keras.to_categorical gives float64) + y_true = K.cast(y_true, 'float32') + y_pred = K.cast(y_pred, 'float32') + + # intersection and union shapes are batch_size * n_classes (values = area in pixels) + axes = (1,2) # W,H axes of each image + intersection = K.sum(K.abs(y_true * y_pred), axis=axes) + mask_sum = K.sum(K.abs(y_true), axis=axes) + K.sum(K.abs(y_pred), axis=axes) + union = mask_sum - intersection # or, np.logical_or(y_pred, y_true) for one-hot + + smooth = .001 + iou = (intersection + smooth) / (union + smooth) + dice = 2 * (intersection + smooth)/(mask_sum + smooth) + + metric = {'iou': iou, 'dice': dice}[metric_name] + + # define mask to be 0 when no pixels are present in either y_true or y_pred, 1 otherwise + mask = K.cast(K.not_equal(union, 0), 'float32') + + if drop_last: + metric = metric[:,:-1] + mask = mask[:,:-1] + + if verbose: + print('intersection, union') + print(K.eval(intersection), K.eval(union)) + print(K.eval(intersection/union)) + + # return mean metrics: remaining axes are (batch, classes) + if flag_naive_mean: + return K.mean(metric) + + # take mean only over non-absent classes + class_count = K.sum(mask, axis=0) + non_zero = tf.greater(class_count, 0) + non_zero_sum = tf.boolean_mask(K.sum(metric * mask, axis=0), non_zero) + non_zero_count = tf.boolean_mask(class_count, non_zero) + + if verbose: + print('Counts of inputs with class present, metrics for non-absent classes') + print(K.eval(class_count), K.eval(non_zero_sum / non_zero_count)) + + return K.mean(non_zero_sum / non_zero_count) + +def mean_iou(y_true, y_pred, **kwargs): + """ + Compute mean Intersection over Union of two segmentation masks, via Keras. + + Calls metrics_k(y_true, y_pred, metric_name='iou'), see there for allowed kwargs. + """ + return seg_metrics(y_true, y_pred, metric_name='iou', **kwargs) +def Mean_IOU(y_true, y_pred): + nb_classes = K.int_shape(y_pred)[-1] + iou = [] + true_pixels = K.argmax(y_true, axis=-1) + pred_pixels = K.argmax(y_pred, axis=-1) + void_labels = K.equal(K.sum(y_true, axis=-1), 0) + for i in range(0, nb_classes): # exclude first label (background) and last label (void) + true_labels = K.equal(true_pixels, i)# & ~void_labels + pred_labels = K.equal(pred_pixels, i)# & ~void_labels + inter = tf.to_int32(true_labels & pred_labels) + union = tf.to_int32(true_labels | pred_labels) + legal_batches = K.sum(tf.to_int32(true_labels), axis=1)>0 + ious = K.sum(inter, axis=1)/K.sum(union, axis=1) + iou.append(K.mean(tf.gather(ious, indices=tf.where(legal_batches)))) # returns average IoU of the same objects + iou = tf.stack(iou) + legal_labels = ~tf.debugging.is_nan(iou) + iou = tf.gather(iou, indices=tf.where(legal_labels)) + return K.mean(iou) + +def iou_vahid(y_true, y_pred): + nb_classes = tf.shape(y_true)[-1]+tf.to_int32(1) + true_pixels = K.argmax(y_true, axis=-1) + pred_pixels = K.argmax(y_pred, axis=-1) + iou = [] + + for i in tf.range(nb_classes): + tp=K.sum( tf.to_int32( K.equal(true_pixels, i) & K.equal(pred_pixels, i) ) ) + fp=K.sum( tf.to_int32( K.not_equal(true_pixels, i) & K.equal(pred_pixels, i) ) ) + fn=K.sum( tf.to_int32( K.equal(true_pixels, i) & K.not_equal(pred_pixels, i) ) ) + iouh=tp/(tp+fp+fn) + iou.append(iouh) + return K.mean(iou) + + +def IoU_metric(Yi,y_predi): + ## mean Intersection over Union + ## Mean IoU = TP/(FN + TP + FP) + y_predi = np.argmax(y_predi, axis=3) + y_testi = np.argmax(Yi, axis=3) + IoUs = [] + Nclass = int(np.max(Yi)) + 1 + for c in range(Nclass): + TP = np.sum( (Yi == c)&(y_predi==c) ) + FP = np.sum( (Yi != c)&(y_predi==c) ) + FN = np.sum( (Yi == c)&(y_predi != c)) + IoU = TP/float(TP + FP + FN) + IoUs.append(IoU) + return K.cast( np.mean(IoUs) ,dtype='float32' ) + + +def IoU_metric_keras(y_true, y_pred): + ## mean Intersection over Union + ## Mean IoU = TP/(FN + TP + FP) + init = tf.global_variables_initializer() + sess = tf.Session() + sess.run(init) + + return IoU_metric(y_true.eval(session=sess), y_pred.eval(session=sess)) + +def jaccard_distance_loss(y_true, y_pred, smooth=100): + """ + Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|) + = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|)) + + The jaccard distance loss is usefull for unbalanced datasets. This has been + shifted so it converges on 0 and is smoothed to avoid exploding or disapearing + gradient. + + Ref: https://en.wikipedia.org/wiki/Jaccard_index + + @url: https://gist.github.com/wassname/f1452b748efcbeb4cb9b1d059dce6f96 + @author: wassname + """ + intersection = K.sum(K.abs(y_true * y_pred), axis=-1) + sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1) + jac = (intersection + smooth) / (sum_ - intersection + smooth) + return (1 - jac) * smooth + + diff --git a/eynollah/eynollah/train/models.py b/eynollah/eynollah/train/models.py new file mode 100644 index 0000000..7c806b4 --- /dev/null +++ b/eynollah/eynollah/train/models.py @@ -0,0 +1,317 @@ +from keras.models import * +from keras.layers import * +from keras import layers +from keras.regularizers import l2 + +resnet50_Weights_path='./pretrained_model/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' +IMAGE_ORDERING ='channels_last' +MERGE_AXIS=-1 + + +def one_side_pad( x ): + x = ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING)(x) + if IMAGE_ORDERING == 'channels_first': + x = Lambda(lambda x : x[: , : , :-1 , :-1 ] )(x) + elif IMAGE_ORDERING == 'channels_last': + x = Lambda(lambda x : x[: , :-1 , :-1 , : ] )(x) + return x + +def identity_block(input_tensor, kernel_size, filters, stage, block): + """The identity block is the block that has no conv layer at shortcut. + # Arguments + input_tensor: input tensor + kernel_size: defualt 3, the kernel size of middle conv layer at main path + filters: list of integers, the filterss of 3 conv layer at main path + stage: integer, current stage label, used for generating layer names + block: 'a','b'..., current block label, used for generating layer names + # Returns + Output tensor for the block. + """ + filters1, filters2, filters3 = filters + + if IMAGE_ORDERING == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + + conv_name_base = 'res' + str(stage) + block + '_branch' + bn_name_base = 'bn' + str(stage) + block + '_branch' + + x = Conv2D(filters1, (1, 1) , data_format=IMAGE_ORDERING , name=conv_name_base + '2a')(input_tensor) + x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) + x = Activation('relu')(x) + + x = Conv2D(filters2, kernel_size , data_format=IMAGE_ORDERING , + padding='same', name=conv_name_base + '2b')(x) + x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) + x = Activation('relu')(x) + + x = Conv2D(filters3 , (1, 1), data_format=IMAGE_ORDERING , name=conv_name_base + '2c')(x) + x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) + + x = layers.add([x, input_tensor]) + x = Activation('relu')(x) + return x + + +def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): + """conv_block is the block that has a conv layer at shortcut + # Arguments + input_tensor: input tensor + kernel_size: defualt 3, the kernel size of middle conv layer at main path + filters: list of integers, the filterss of 3 conv layer at main path + stage: integer, current stage label, used for generating layer names + block: 'a','b'..., current block label, used for generating layer names + # Returns + Output tensor for the block. + Note that from stage 3, the first conv layer at main path is with strides=(2,2) + And the shortcut should have strides=(2,2) as well + """ + filters1, filters2, filters3 = filters + + if IMAGE_ORDERING == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + + conv_name_base = 'res' + str(stage) + block + '_branch' + bn_name_base = 'bn' + str(stage) + block + '_branch' + + x = Conv2D(filters1, (1, 1) , data_format=IMAGE_ORDERING , strides=strides, + name=conv_name_base + '2a')(input_tensor) + x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) + x = Activation('relu')(x) + + x = Conv2D(filters2, kernel_size , data_format=IMAGE_ORDERING , padding='same', + name=conv_name_base + '2b')(x) + x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) + x = Activation('relu')(x) + + x = Conv2D(filters3, (1, 1) , data_format=IMAGE_ORDERING , name=conv_name_base + '2c')(x) + x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) + + shortcut = Conv2D(filters3, (1, 1) , data_format=IMAGE_ORDERING , strides=strides, + name=conv_name_base + '1')(input_tensor) + shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) + + x = layers.add([x, shortcut]) + x = Activation('relu')(x) + return x + + +def resnet50_unet_light(n_classes,input_height=224,input_width=224,weight_decay=1e-6,pretraining=False): + assert input_height%32 == 0 + assert input_width%32 == 0 + + + img_input = Input(shape=(input_height,input_width , 3 )) + + if IMAGE_ORDERING == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + + x = ZeroPadding2D((3, 3), data_format=IMAGE_ORDERING)(img_input) + x = Conv2D(64, (7, 7), data_format=IMAGE_ORDERING, strides=(2, 2),kernel_regularizer=l2(weight_decay), name='conv1')(x) + f1 = x + + x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) + x = Activation('relu')(x) + x = MaxPooling2D((3, 3) , data_format=IMAGE_ORDERING , strides=(2, 2))(x) + + + x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) + x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') + x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') + f2 = one_side_pad(x ) + + + x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') + f3 = x + + x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') + f4 = x + + x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') + f5 = x + + + if pretraining: + model=Model( img_input , x ).load_weights(resnet50_Weights_path) + + + v512_2048 = Conv2D( 512 , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( f5 ) + v512_2048 = ( BatchNormalization(axis=bn_axis))(v512_2048) + v512_2048 = Activation('relu')(v512_2048) + + + + v512_1024=Conv2D( 512 , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( f4 ) + v512_1024 = ( BatchNormalization(axis=bn_axis))(v512_1024) + v512_1024 = Activation('relu')(v512_1024) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(v512_2048) + o = ( concatenate([ o ,v512_1024],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) + o = ( Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay)))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([ o ,f3],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) + o = ( Conv2D( 256, (3, 3), padding='valid', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay)))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([o,f2],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING))(o) + o = ( Conv2D( 128 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay) ) )(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([o,f1],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) + o = ( Conv2D( 64 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) ))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([o,img_input],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) + o = ( Conv2D( 32 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) ))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + + o = Conv2D( n_classes , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( o ) + o = ( BatchNormalization(axis=bn_axis))(o) + o = (Activation('softmax'))(o) + + + model = Model( img_input , o ) + return model + +def resnet50_unet(n_classes,input_height=224,input_width=224,weight_decay=1e-6,pretraining=False): + assert input_height%32 == 0 + assert input_width%32 == 0 + + + img_input = Input(shape=(input_height,input_width , 3 )) + + if IMAGE_ORDERING == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + + x = ZeroPadding2D((3, 3), data_format=IMAGE_ORDERING)(img_input) + x = Conv2D(64, (7, 7), data_format=IMAGE_ORDERING, strides=(2, 2),kernel_regularizer=l2(weight_decay), name='conv1')(x) + f1 = x + + x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) + x = Activation('relu')(x) + x = MaxPooling2D((3, 3) , data_format=IMAGE_ORDERING , strides=(2, 2))(x) + + + x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) + x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') + x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') + f2 = one_side_pad(x ) + + + x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') + f3 = x + + x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') + f4 = x + + x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') + f5 = x + + if pretraining: + Model( img_input , x ).load_weights(resnet50_Weights_path) + + v1024_2048 = Conv2D( 1024 , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( f5 ) + v1024_2048 = ( BatchNormalization(axis=bn_axis))(v1024_2048) + v1024_2048 = Activation('relu')(v1024_2048) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(v1024_2048) + o = ( concatenate([ o ,f4],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) + o = ( Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay)))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([ o ,f3],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) + o = ( Conv2D( 256, (3, 3), padding='valid', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay)))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([o,f2],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING))(o) + o = ( Conv2D( 128 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay) ) )(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([o,f1],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) + o = ( Conv2D( 64 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) ))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([o,img_input],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) + o = ( Conv2D( 32 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) ))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = Conv2D( n_classes , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( o ) + o = ( BatchNormalization(axis=bn_axis))(o) + o = (Activation('softmax'))(o) + + model = Model( img_input , o ) + + + + + return model diff --git a/eynollah/eynollah/train/train.py b/eynollah/eynollah/train/train.py new file mode 100644 index 0000000..0cc5ef3 --- /dev/null +++ b/eynollah/eynollah/train/train.py @@ -0,0 +1,238 @@ +import os +import sys +import tensorflow as tf +from keras.backend.tensorflow_backend import set_session +import keras , warnings +from keras.optimizers import * +from sacred import Experiment +from models import * +from utils import * +from metrics import * +from keras.models import load_model +from tqdm import tqdm + +def configuration(): + keras.backend.clear_session() + tf.reset_default_graph() + warnings.filterwarnings('ignore') + + os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID' + config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) + + + config.gpu_options.allow_growth = True + config.gpu_options.per_process_gpu_memory_fraction=0.95#0.95 + config.gpu_options.visible_device_list="0" + set_session(tf.Session(config=config)) + +def get_dirs_or_files(input_data): + if os.path.isdir(input_data): + image_input, labels_input = os.path.join(input_data, 'images/'), os.path.join(input_data, 'labels/') + # Check if training dir exists + assert os.path.isdir(image_input), "{} is not a directory".format(image_input) + assert os.path.isdir(labels_input), "{} is not a directory".format(labels_input) + return image_input, labels_input + +ex = Experiment() + +@ex.config +def config_params(): + n_classes=None # Number of classes. If your case study is binary case the set it to 2 and otherwise give your number of cases. + n_epochs=1 + input_height=224*1 + input_width=224*1 + weight_decay=1e-6 # Weight decay of l2 regularization of model layers. + n_batch=1 # Number of batches at each iteration. + learning_rate=1e-4 + patches=False # Make patches of image in order to use all information of image. In the case of page + # extraction this should be set to false since model should see all image. + augmentation=False + flip_aug=False # Flip image (augmentation). + blur_aug=False # Blur patches of image (augmentation). + scaling=False # Scaling of patches (augmentation) will be imposed if this set to true. + binarization=False # Otsu thresholding. Used for augmentation in the case of binary case like textline prediction. For multicases should not be applied. + dir_train=None # Directory of training dataset (sub-folders should be named images and labels). + dir_eval=None # Directory of validation dataset (sub-folders should be named images and labels). + dir_output=None # Directory of output where the model should be saved. + pretraining=False # Set true to load pretrained weights of resnet50 encoder. + scaling_bluring=False + scaling_binarization=False + scaling_flip=False + thetha=[10,-10] + blur_k=['blur','guass','median'] # Used in order to blur image. Used for augmentation. + scales= [ 0.5, 2 ] # Scale patches with these scales. Used for augmentation. + flip_index=[0,1,-1] # Flip image. Used for augmentation. + continue_training = False # If + index_start = 0 + dir_of_start_model = '' + is_loss_soft_dice = False + weighted_loss = False + data_is_provided = False + +@ex.automain +def run(n_classes,n_epochs,input_height, + input_width,weight_decay,weighted_loss, + index_start,dir_of_start_model,is_loss_soft_dice, + n_batch,patches,augmentation,flip_aug + ,blur_aug,scaling, binarization, + blur_k,scales,dir_train,data_is_provided, + scaling_bluring,scaling_binarization,rotation, + rotation_not_90,thetha,scaling_flip,continue_training, + flip_index,dir_eval ,dir_output,pretraining,learning_rate): + + + if data_is_provided: + dir_train_flowing=os.path.join(dir_output,'train') + dir_eval_flowing=os.path.join(dir_output,'eval') + + dir_flow_train_imgs=os.path.join(dir_train_flowing,'images') + dir_flow_train_labels=os.path.join(dir_train_flowing,'labels') + + dir_flow_eval_imgs=os.path.join(dir_eval_flowing,'images') + dir_flow_eval_labels=os.path.join(dir_eval_flowing,'labels') + + configuration() + + else: + dir_img,dir_seg=get_dirs_or_files(dir_train) + dir_img_val,dir_seg_val=get_dirs_or_files(dir_eval) + + # make first a directory in output for both training and evaluations in order to flow data from these directories. + dir_train_flowing=os.path.join(dir_output,'train') + dir_eval_flowing=os.path.join(dir_output,'eval') + + dir_flow_train_imgs=os.path.join(dir_train_flowing,'images/') + dir_flow_train_labels=os.path.join(dir_train_flowing,'labels/') + + dir_flow_eval_imgs=os.path.join(dir_eval_flowing,'images/') + dir_flow_eval_labels=os.path.join(dir_eval_flowing,'labels/') + + if os.path.isdir(dir_train_flowing): + os.system('rm -rf '+dir_train_flowing) + os.makedirs(dir_train_flowing) + else: + os.makedirs(dir_train_flowing) + + if os.path.isdir(dir_eval_flowing): + os.system('rm -rf '+dir_eval_flowing) + os.makedirs(dir_eval_flowing) + else: + os.makedirs(dir_eval_flowing) + + + os.mkdir(dir_flow_train_imgs) + os.mkdir(dir_flow_train_labels) + + os.mkdir(dir_flow_eval_imgs) + os.mkdir(dir_flow_eval_labels) + + + #set the gpu configuration + configuration() + + + #writing patches into a sub-folder in order to be flowed from directory. + provide_patches(dir_img,dir_seg,dir_flow_train_imgs, + dir_flow_train_labels, + input_height,input_width,blur_k,blur_aug, + flip_aug,binarization,scaling,scales,flip_index, + scaling_bluring,scaling_binarization,rotation, + rotation_not_90,thetha,scaling_flip, + augmentation=augmentation,patches=patches) + + provide_patches(dir_img_val,dir_seg_val,dir_flow_eval_imgs, + dir_flow_eval_labels, + input_height,input_width,blur_k,blur_aug, + flip_aug,binarization,scaling,scales,flip_index, + scaling_bluring,scaling_binarization,rotation, + rotation_not_90,thetha,scaling_flip, + augmentation=False,patches=patches) + + + + if weighted_loss: + weights=np.zeros(n_classes) + if data_is_provided: + for obj in os.listdir(dir_flow_train_labels): + try: + label_obj=cv2.imread(dir_flow_train_labels+'/'+obj) + label_obj_one_hot=get_one_hot( label_obj,label_obj.shape[0],label_obj.shape[1],n_classes) + weights+=(label_obj_one_hot.sum(axis=0)).sum(axis=0) + except: + pass + else: + + for obj in os.listdir(dir_seg): + try: + label_obj=cv2.imread(dir_seg+'/'+obj) + label_obj_one_hot=get_one_hot( label_obj,label_obj.shape[0],label_obj.shape[1],n_classes) + weights+=(label_obj_one_hot.sum(axis=0)).sum(axis=0) + except: + pass + + + weights=1.00/weights + + weights=weights/float(np.sum(weights)) + weights=weights/float(np.min(weights)) + weights=weights/float(np.sum(weights)) + + + + if continue_training: + if is_loss_soft_dice: + model = load_model (dir_of_start_model, compile = True, custom_objects={'soft_dice_loss': soft_dice_loss}) + if weighted_loss: + model = load_model (dir_of_start_model, compile = True, custom_objects={'loss': weighted_categorical_crossentropy(weights)}) + if not is_loss_soft_dice and not weighted_loss: + model = load_model (dir_of_start_model, compile = True) + else: + #get our model. + index_start = 0 + model = resnet50_unet(n_classes, input_height, input_width,weight_decay,pretraining) + + #if you want to see the model structure just uncomment model summary. + #model.summary() + + + if not is_loss_soft_dice and not weighted_loss: + model.compile(loss='categorical_crossentropy', + optimizer = Adam(lr=learning_rate),metrics=['accuracy']) + if is_loss_soft_dice: + model.compile(loss=soft_dice_loss, + optimizer = Adam(lr=learning_rate),metrics=['accuracy']) + + if weighted_loss: + model.compile(loss=weighted_categorical_crossentropy(weights), + optimizer = Adam(lr=learning_rate),metrics=['accuracy']) + + #generating train and evaluation data + train_gen = data_gen(dir_flow_train_imgs,dir_flow_train_labels, batch_size = n_batch, + input_height=input_height, input_width=input_width,n_classes=n_classes ) + val_gen = data_gen(dir_flow_eval_imgs,dir_flow_eval_labels, batch_size = n_batch, + input_height=input_height, input_width=input_width,n_classes=n_classes ) + + for i in tqdm(range(index_start, n_epochs+index_start)): + model.fit_generator( + train_gen, + steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs))/n_batch)-1, + validation_data=val_gen, + validation_steps=1, + epochs=1) + model.save(dir_output+'/'+'model_'+str(i)+'.h5') + + + #os.system('rm -rf '+dir_train_flowing) + #os.system('rm -rf '+dir_eval_flowing) + + #model.save(dir_output+'/'+'model'+'.h5') + + + + + + + + + + diff --git a/eynollah/eynollah/train/utils.py b/eynollah/eynollah/train/utils.py new file mode 100644 index 0000000..19ab46e --- /dev/null +++ b/eynollah/eynollah/train/utils.py @@ -0,0 +1,497 @@ +import os +import cv2 +import numpy as np +import seaborn as sns +from scipy.ndimage.interpolation import map_coordinates +from scipy.ndimage.filters import gaussian_filter +import random +from tqdm import tqdm +import imutils +import math + + + +def bluring(img_in,kind): + if kind=='guass': + img_blur = cv2.GaussianBlur(img_in,(5,5),0) + elif kind=="median": + img_blur = cv2.medianBlur(img_in,5) + elif kind=='blur': + img_blur=cv2.blur(img_in,(5,5)) + return img_blur + +def elastic_transform(image, alpha, sigma,seedj, random_state=None): + + """Elastic deformation of images as described in [Simard2003]_. + .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for + Convolutional Neural Networks applied to Visual Document Analysis", in + Proc. of the International Conference on Document Analysis and + Recognition, 2003. + """ + if random_state is None: + random_state = np.random.RandomState(seedj) + + shape = image.shape + dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode="constant", cval=0) * alpha + dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma, mode="constant", cval=0) * alpha + dz = np.zeros_like(dx) + + x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]), np.arange(shape[2])) + indices = np.reshape(y+dy, (-1, 1)), np.reshape(x+dx, (-1, 1)), np.reshape(z, (-1, 1)) + + distored_image = map_coordinates(image, indices, order=1, mode='reflect') + return distored_image.reshape(image.shape) + +def rotation_90(img): + img_rot=np.zeros((img.shape[1],img.shape[0],img.shape[2])) + img_rot[:,:,0]=img[:,:,0].T + img_rot[:,:,1]=img[:,:,1].T + img_rot[:,:,2]=img[:,:,2].T + return img_rot + +def rotatedRectWithMaxArea(w, h, angle): + """ + Given a rectangle of size wxh that has been rotated by 'angle' (in + radians), computes the width and height of the largest possible + axis-aligned rectangle (maximal area) within the rotated rectangle. + """ + if w <= 0 or h <= 0: + return 0,0 + + width_is_longer = w >= h + side_long, side_short = (w,h) if width_is_longer else (h,w) + + # since the solutions for angle, -angle and 180-angle are all the same, + # if suffices to look at the first quadrant and the absolute values of sin,cos: + sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle)) + if side_short <= 2.*sin_a*cos_a*side_long or abs(sin_a-cos_a) < 1e-10: + # half constrained case: two crop corners touch the longer side, + # the other two corners are on the mid-line parallel to the longer line + x = 0.5*side_short + wr,hr = (x/sin_a,x/cos_a) if width_is_longer else (x/cos_a,x/sin_a) + else: + # fully constrained case: crop touches all 4 sides + cos_2a = cos_a*cos_a - sin_a*sin_a + wr,hr = (w*cos_a - h*sin_a)/cos_2a, (h*cos_a - w*sin_a)/cos_2a + + return wr,hr + +def rotate_max_area(image,rotated, rotated_label,angle): + """ image: cv2 image matrix object + angle: in degree + """ + wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], + math.radians(angle)) + h, w, _ = rotated.shape + y1 = h//2 - int(hr/2) + y2 = y1 + int(hr) + x1 = w//2 - int(wr/2) + x2 = x1 + int(wr) + return rotated[y1:y2, x1:x2],rotated_label[y1:y2, x1:x2] +def rotation_not_90_func(img,label,thetha): + rotated=imutils.rotate(img,thetha) + rotated_label=imutils.rotate(label,thetha) + return rotate_max_area(img, rotated,rotated_label,thetha) + +def color_images(seg, n_classes): + ann_u=range(n_classes) + if len(np.shape(seg))==3: + seg=seg[:,:,0] + + seg_img=np.zeros((np.shape(seg)[0],np.shape(seg)[1],3)).astype(float) + colors=sns.color_palette("hls", n_classes) + + for c in ann_u: + c=int(c) + segl=(seg==c) + seg_img[:,:,0]+=segl*(colors[c][0]) + seg_img[:,:,1]+=segl*(colors[c][1]) + seg_img[:,:,2]+=segl*(colors[c][2]) + return seg_img + + +def resize_image(seg_in,input_height,input_width): + return cv2.resize(seg_in,(input_width,input_height),interpolation=cv2.INTER_NEAREST) +def get_one_hot(seg,input_height,input_width,n_classes): + seg=seg[:,:,0] + seg_f=np.zeros((input_height, input_width,n_classes)) + for j in range(n_classes): + seg_f[:,:,j]=(seg==j).astype(int) + return seg_f + + +def IoU(Yi,y_predi): + ## mean Intersection over Union + ## Mean IoU = TP/(FN + TP + FP) + + IoUs = [] + classes_true=np.unique(Yi) + for c in classes_true: + TP = np.sum( (Yi == c)&(y_predi==c) ) + FP = np.sum( (Yi != c)&(y_predi==c) ) + FN = np.sum( (Yi == c)&(y_predi != c)) + IoU = TP/float(TP + FP + FN) + print("class {:02.0f}: #TP={:6.0f}, #FP={:6.0f}, #FN={:5.0f}, IoU={:4.3f}".format(c,TP,FP,FN,IoU)) + IoUs.append(IoU) + mIoU = np.mean(IoUs) + print("_________________") + print("Mean IoU: {:4.3f}".format(mIoU)) + return mIoU +def data_gen(img_folder, mask_folder, batch_size,input_height, input_width,n_classes): + c = 0 + n = [f for f in os.listdir(img_folder) if not f.startswith('.')]# os.listdir(img_folder) #List of training images + random.shuffle(n) + while True: + img = np.zeros((batch_size, input_height, input_width, 3)).astype('float') + mask = np.zeros((batch_size, input_height, input_width, n_classes)).astype('float') + + for i in range(c, c+batch_size): #initially from 0 to 16, c = 0. + #print(img_folder+'/'+n[i]) + + try: + filename=n[i].split('.')[0] + + train_img = cv2.imread(img_folder+'/'+n[i])/255. + train_img = cv2.resize(train_img, (input_width, input_height),interpolation=cv2.INTER_NEAREST)# Read an image from folder and resize + + img[i-c] = train_img #add to array - img[0], img[1], and so on. + train_mask = cv2.imread(mask_folder+'/'+filename+'.png') + #print(mask_folder+'/'+filename+'.png') + #print(train_mask.shape) + train_mask = get_one_hot( resize_image(train_mask,input_height,input_width),input_height,input_width,n_classes) + #train_mask = train_mask.reshape(224, 224, 1) # Add extra dimension for parity with train_img size [512 * 512 * 3] + + mask[i-c] = train_mask + except: + img[i-c] = np.ones((input_height, input_width, 3)).astype('float') + mask[i-c] = np.zeros((input_height, input_width, n_classes)).astype('float') + + + + c+=batch_size + if(c+batch_size>=len(os.listdir(img_folder))): + c=0 + random.shuffle(n) + yield img, mask + +def otsu_copy(img): + img_r=np.zeros(img.shape) + img1=img[:,:,0] + img2=img[:,:,1] + img3=img[:,:,2] + _, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) + _, threshold2 = cv2.threshold(img2, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) + _, threshold3 = cv2.threshold(img3, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) + img_r[:,:,0]=threshold1 + img_r[:,:,1]=threshold1 + img_r[:,:,2]=threshold1 + return img_r +def get_patches(dir_img_f,dir_seg_f,img,label,height,width,indexer): + + if img.shape[0]int(nxf): + nxf=int(nxf)+1 + if nyf>int(nyf): + nyf=int(nyf)+1 + + nxf=int(nxf) + nyf=int(nyf) + + for i in range(nxf): + for j in range(nyf): + index_x_d=i*width + index_x_u=(i+1)*width + + index_y_d=j*height + index_y_u=(j+1)*height + + if index_x_u>img_w: + index_x_u=img_w + index_x_d=img_w-width + if index_y_u>img_h: + index_y_u=img_h + index_y_d=img_h-height + + + img_patch=img[index_y_d:index_y_u,index_x_d:index_x_u,:] + label_patch=label[index_y_d:index_y_u,index_x_d:index_x_u,:] + + cv2.imwrite(dir_img_f+'/img_'+str(indexer)+'.png', img_patch ) + cv2.imwrite(dir_seg_f+'/img_'+str(indexer)+'.png' , label_patch ) + indexer+=1 + + return indexer + +def do_padding(img,label,height,width): + + height_new=img.shape[0] + width_new=img.shape[1] + + h_start=0 + w_start=0 + + if img.shape[0]int(nxf): + nxf=int(nxf)+1 + if nyf>int(nyf): + nyf=int(nyf)+1 + + nxf=int(nxf) + nyf=int(nyf) + + for i in range(nxf): + for j in range(nyf): + index_x_d=i*width_scale + index_x_u=(i+1)*width_scale + + index_y_d=j*height_scale + index_y_u=(j+1)*height_scale + + if index_x_u>img_w: + index_x_u=img_w + index_x_d=img_w-width_scale + if index_y_u>img_h: + index_y_u=img_h + index_y_d=img_h-height_scale + + + img_patch=img[index_y_d:index_y_u,index_x_d:index_x_u,:] + label_patch=label[index_y_d:index_y_u,index_x_d:index_x_u,:] + + img_patch=resize_image(img_patch,height,width) + label_patch=resize_image(label_patch,height,width) + + cv2.imwrite(dir_img_f+'/img_'+str(indexer)+'.png', img_patch ) + cv2.imwrite(dir_seg_f+'/img_'+str(indexer)+'.png' , label_patch ) + indexer+=1 + + return indexer + +def get_patches_num_scale_new(dir_img_f,dir_seg_f,img,label,height,width,indexer,scaler): + img=resize_image(img,int(img.shape[0]*scaler),int(img.shape[1]*scaler)) + label=resize_image(label,int(label.shape[0]*scaler),int(label.shape[1]*scaler)) + + if img.shape[0]int(nxf): + nxf=int(nxf)+1 + if nyf>int(nyf): + nyf=int(nyf)+1 + + nxf=int(nxf) + nyf=int(nyf) + + for i in range(nxf): + for j in range(nyf): + index_x_d=i*width_scale + index_x_u=(i+1)*width_scale + + index_y_d=j*height_scale + index_y_u=(j+1)*height_scale + + if index_x_u>img_w: + index_x_u=img_w + index_x_d=img_w-width_scale + if index_y_u>img_h: + index_y_u=img_h + index_y_d=img_h-height_scale + + + img_patch=img[index_y_d:index_y_u,index_x_d:index_x_u,:] + label_patch=label[index_y_d:index_y_u,index_x_d:index_x_u,:] + + #img_patch=resize_image(img_patch,height,width) + #label_patch=resize_image(label_patch,height,width) + + cv2.imwrite(dir_img_f+'/img_'+str(indexer)+'.png', img_patch ) + cv2.imwrite(dir_seg_f+'/img_'+str(indexer)+'.png' , label_patch ) + indexer+=1 + + return indexer + + +def provide_patches(dir_img,dir_seg,dir_flow_train_imgs, + dir_flow_train_labels, + input_height,input_width,blur_k,blur_aug, + flip_aug,binarization,scaling,scales,flip_index, + scaling_bluring,scaling_binarization,rotation, + rotation_not_90,thetha,scaling_flip, + augmentation=False,patches=False): + + imgs_cv_train=np.array(os.listdir(dir_img)) + segs_cv_train=np.array(os.listdir(dir_seg)) + + indexer=0 + for im, seg_i in tqdm(zip(imgs_cv_train,segs_cv_train)): + img_name=im.split('.')[0] + if not patches: + cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', resize_image(cv2.imread(dir_img+'/'+im),input_height,input_width ) ) + cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png' , resize_image(cv2.imread(dir_seg+'/'+img_name+'.png'),input_height,input_width ) ) + indexer+=1 + + if augmentation: + if flip_aug: + for f_i in flip_index: + cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', + resize_image(cv2.flip(cv2.imread(dir_img+'/'+im),f_i),input_height,input_width) ) + + cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png' , + resize_image(cv2.flip(cv2.imread(dir_seg+'/'+img_name+'.png'),f_i),input_height,input_width) ) + indexer+=1 + + if blur_aug: + for blur_i in blur_k: + cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', + (resize_image(bluring(cv2.imread(dir_img+'/'+im),blur_i),input_height,input_width) ) ) + + cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png' , + resize_image(cv2.imread(dir_seg+'/'+img_name+'.png'),input_height,input_width) ) + indexer+=1 + + + if binarization: + cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', + resize_image(otsu_copy( cv2.imread(dir_img+'/'+im)),input_height,input_width )) + + cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png', + resize_image( cv2.imread(dir_seg+'/'+img_name+'.png'),input_height,input_width )) + indexer+=1 + + + + + + + if patches: + + indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, + cv2.imread(dir_img+'/'+im),cv2.imread(dir_seg+'/'+img_name+'.png'), + input_height,input_width,indexer=indexer) + + if augmentation: + + if rotation: + + + indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, + rotation_90( cv2.imread(dir_img+'/'+im) ), + rotation_90( cv2.imread(dir_seg+'/'+img_name+'.png') ), + input_height,input_width,indexer=indexer) + + if rotation_not_90: + + for thetha_i in thetha: + img_max_rotated,label_max_rotated=rotation_not_90_func(cv2.imread(dir_img+'/'+im),cv2.imread(dir_seg+'/'+img_name+'.png'),thetha_i) + indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, + img_max_rotated, + label_max_rotated, + input_height,input_width,indexer=indexer) + if flip_aug: + for f_i in flip_index: + indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, + cv2.flip( cv2.imread(dir_img+'/'+im) , f_i), + cv2.flip( cv2.imread(dir_seg+'/'+img_name+'.png') ,f_i), + input_height,input_width,indexer=indexer) + if blur_aug: + for blur_i in blur_k: + + indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, + bluring( cv2.imread(dir_img+'/'+im) , blur_i), + cv2.imread(dir_seg+'/'+img_name+'.png'), + input_height,input_width,indexer=indexer) + + + if scaling: + for sc_ind in scales: + indexer=get_patches_num_scale_new(dir_flow_train_imgs,dir_flow_train_labels, + cv2.imread(dir_img+'/'+im) , + cv2.imread(dir_seg+'/'+img_name+'.png'), + input_height,input_width,indexer=indexer,scaler=sc_ind) + if binarization: + indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, + otsu_copy( cv2.imread(dir_img+'/'+im)), + cv2.imread(dir_seg+'/'+img_name+'.png'), + input_height,input_width,indexer=indexer) + + + + if scaling_bluring: + for sc_ind in scales: + for blur_i in blur_k: + indexer=get_patches_num_scale_new(dir_flow_train_imgs,dir_flow_train_labels, + bluring( cv2.imread(dir_img+'/'+im) , blur_i) , + cv2.imread(dir_seg+'/'+img_name+'.png') , + input_height,input_width,indexer=indexer,scaler=sc_ind) + + if scaling_binarization: + for sc_ind in scales: + indexer=get_patches_num_scale_new(dir_flow_train_imgs,dir_flow_train_labels, + otsu_copy( cv2.imread(dir_img+'/'+im)) , + cv2.imread(dir_seg+'/'+img_name+'.png'), + input_height,input_width,indexer=indexer,scaler=sc_ind) + + if scaling_flip: + for sc_ind in scales: + for f_i in flip_index: + indexer=get_patches_num_scale_new(dir_flow_train_imgs,dir_flow_train_labels, + cv2.flip( cv2.imread(dir_img+'/'+im) , f_i) , + cv2.flip(cv2.imread(dir_seg+'/'+img_name+'.png') ,f_i) , + input_height,input_width,indexer=indexer,scaler=sc_ind) + + + + + + + From 3f4c30a47acaa726a499af791731415911791a0e Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 23 Feb 2024 20:26:23 +0100 Subject: [PATCH 03/34] fix ocrd-tool.json path --- ocrd-tool.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd-tool.json b/ocrd-tool.json index 5c48493..5e513ae 120000 --- a/ocrd-tool.json +++ b/ocrd-tool.json @@ -1 +1 @@ -qurator/eynollah/ocrd-tool.json \ No newline at end of file +eynollah/eynollah/ocrd-tool.json \ No newline at end of file From 63946ad40339a545bcf59d2fba7f59babe524da6 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 23 Feb 2024 20:37:40 +0100 Subject: [PATCH 04/34] add pagexml2img.py to 'train' --- eynollah/eynollah/train/pagexml2img.py | 248 +++++++++++++++++++++++++ 1 file changed, 248 insertions(+) create mode 100644 eynollah/eynollah/train/pagexml2img.py diff --git a/eynollah/eynollah/train/pagexml2img.py b/eynollah/eynollah/train/pagexml2img.py new file mode 100644 index 0000000..c489315 --- /dev/null +++ b/eynollah/eynollah/train/pagexml2img.py @@ -0,0 +1,248 @@ +#! /usr/bin/env python3 + +__version__= '1.0' + +import argparse +import sys +import os +import numpy as np +import warnings +import xml.etree.ElementTree as ET +from tqdm import tqdm +import cv2 + +with warnings.catch_warnings(): + warnings.simplefilter("ignore") + +__doc__=\ +""" +tool to extract 2d or 3d RGB images from page xml data. In former case output will be 1 +2D image array which each class has filled with a pixel value. In the case of 3D RGB image +each class will be defined with a RGB value and beside images a text file of classes also will be produced. +This classes.txt file is required for dhsegment tool. +""" + +class pagexml2img: + def __init__(self,dir_in, out_dir,output_type): + self.dir=dir_in + self.output_dir=out_dir + self.output_type=output_type + + def get_content_of_dir(self): + """ + Listing all ground truth page xml files. All files are needed to have xml format. + """ + + + gt_all=os.listdir(self.dir) + self.gt_list=[file for file in gt_all if file.split('.')[ len(file.split('.'))-1 ]=='xml' ] + + def get_images_of_ground_truth(self): + """ + Reading the page xml files and write the ground truth images into given output directory. + """ + + if self.output_type=='3d' or self.output_type=='3D': + classes=np.array([ [0,0,0, 1, 0, 0, 0, 0], + [255,0,0, 0, 1, 0, 0, 0], + [0,255,0, 0, 0, 1, 0, 0], + [0,0,255, 0, 0, 0, 1, 0], + [0,255,255, 0, 0, 0, 0, 1] ]) + + + + + for index in tqdm(range(len(self.gt_list))): + try: + tree1 = ET.parse(self.dir+'/'+self.gt_list[index]) + root1=tree1.getroot() + alltags=[elem.tag for elem in root1.iter()] + link=alltags[0].split('}')[0]+'}' + + region_tags=np.unique([x for x in alltags if x.endswith('Region')]) + + for jj in root1.iter(link+'Page'): + y_len=int(jj.attrib['imageHeight']) + x_len=int(jj.attrib['imageWidth']) + + co_text=[] + co_sep=[] + co_img=[] + co_table=[] + + for tag in region_tags: + if tag.endswith('}TextRegion') or tag.endswith('}Textregion') or tag.endswith('}textRegion') or tag.endswith('}textregion'): + + for nn in root1.iter(tag): + for co_it in nn.iter(link+'Coords'): + if bool(co_it.attrib)==False: + c_t_in=[] + for ll in nn.iter(link+'Point'): + c_t_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + co_text.append(np.array(c_t_in)) + print(co_text) + elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): + p_h=co_it.attrib['points'].split(' ') + co_text.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + + + elif tag.endswith('}ImageRegion') or tag.endswith('}Imageregion') or tag.endswith('}imageRegion') or tag.endswith('}imageregion'): + for nn in root1.iter(tag): + for co_it in nn.iter(link+'Coords'): + if bool(co_it.attrib)==False: + c_i_in=[] + for ll in nn.iter(link+'Point'): + c_i_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + co_img.append(np.array(c_i_in)) + elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): + p_h=co_it.attrib['points'].split(' ') + co_img.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + + elif tag.endswith('}SeparatorRegion') or tag.endswith('}Separatorregion') or tag.endswith('}separatorRegion') or tag.endswith('}separatorregion'): + for nn in root1.iter(tag): + for co_it in nn.iter(link+'Coords'): + if bool(co_it.attrib)==False: + c_s_in=[] + for ll in nn.iter(link+'Point'): + c_s_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + co_sep.append(np.array(c_s_in)) + + elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): + p_h=co_it.attrib['points'].split(' ') + co_sep.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + + elif tag.endswith('}TableRegion') or tag.endswith('}tableRegion') or tag.endswith('}Tableregion') or tag.endswith('}tableregion'): + for nn in root1.iter(tag): + for co_it in nn.iter(link+'Coords'): + if bool(co_it.attrib)==False: + c_ta_in=[] + for ll in nn.iter(link+'Point'): + c_ta_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + co_table.append(np.array(c_ta_in)) + + elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): + p_h=co_it.attrib['points'].split(' ') + co_table.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + else: + pass + + img = np.zeros( (y_len,x_len,3) ) + img_poly=cv2.fillPoly(img, pts =co_text, color=(255,0,0)) + img_poly=cv2.fillPoly(img, pts =co_img, color=(0,255,0)) + img_poly=cv2.fillPoly(img, pts =co_sep, color=(0,0,255)) + img_poly=cv2.fillPoly(img, pts =co_table, color=(0,255,255)) + + try: + cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) + except: + cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) + except: + pass + np.savetxt(self.output_dir+'/../classes.txt',classes) + + if self.output_type=='2d' or self.output_type=='2D': + for index in tqdm(range(len(self.gt_list))): + try: + tree1 = ET.parse(self.dir+'/'+self.gt_list[index]) + root1=tree1.getroot() + alltags=[elem.tag for elem in root1.iter()] + link=alltags[0].split('}')[0]+'}' + + region_tags=np.unique([x for x in alltags if x.endswith('Region')]) + + for jj in root1.iter(link+'Page'): + y_len=int(jj.attrib['imageHeight']) + x_len=int(jj.attrib['imageWidth']) + + co_text=[] + co_sep=[] + co_img=[] + co_table=[] + + for tag in region_tags: + if tag.endswith('}TextRegion') or tag.endswith('}Textregion') or tag.endswith('}textRegion') or tag.endswith('}textregion'): + + for nn in root1.iter(tag): + for co_it in nn.iter(link+'Coords'): + if bool(co_it.attrib)==False: + c_t_in=[] + for ll in nn.iter(link+'Point'): + c_t_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + co_text.append(np.array(c_t_in)) + print(co_text) + elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): + p_h=co_it.attrib['points'].split(' ') + co_text.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + + + elif tag.endswith('}ImageRegion') or tag.endswith('}Imageregion') or tag.endswith('}imageRegion') or tag.endswith('}imageregion'): + for nn in root1.iter(tag): + for co_it in nn.iter(link+'Coords'): + if bool(co_it.attrib)==False: + c_i_in=[] + for ll in nn.iter(link+'Point'): + c_i_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + co_img.append(np.array(c_i_in)) + elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): + p_h=co_it.attrib['points'].split(' ') + co_img.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + + elif tag.endswith('}SeparatorRegion') or tag.endswith('}Separatorregion') or tag.endswith('}separatorRegion') or tag.endswith('}separatorregion'): + for nn in root1.iter(tag): + for co_it in nn.iter(link+'Coords'): + if bool(co_it.attrib)==False: + c_s_in=[] + for ll in nn.iter(link+'Point'): + c_s_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + co_sep.append(np.array(c_s_in)) + + elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): + p_h=co_it.attrib['points'].split(' ') + co_sep.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + + elif tag.endswith('}TableRegion') or tag.endswith('}tableRegion') or tag.endswith('}Tableregion') or tag.endswith('}tableregion'): + for nn in root1.iter(tag): + for co_it in nn.iter(link+'Coords'): + if bool(co_it.attrib)==False: + c_ta_in=[] + for ll in nn.iter(link+'Point'): + c_ta_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + co_table.append(np.array(c_ta_in)) + + elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): + p_h=co_it.attrib['points'].split(' ') + co_table.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + else: + pass + + img = np.zeros( (y_len,x_len) ) + img_poly=cv2.fillPoly(img, pts =co_text, color=(1,1,1)) + img_poly=cv2.fillPoly(img, pts =co_img, color=(2,2,2)) + img_poly=cv2.fillPoly(img, pts =co_sep, color=(3,3,3)) + img_poly=cv2.fillPoly(img, pts =co_table, color=(4,4,4)) + try: + cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) + except: + cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) + except: + pass + def run(self): + self.get_content_of_dir() + self.get_images_of_ground_truth() +def main(): + parser=argparse.ArgumentParser() + + parser.add_argument('-dir_in','--dir_in', dest='inp1', default=None, help='directory of page-xml files') + parser.add_argument('-dir_out','--dir_out', dest='inp2', default=None, help='directory where ground truth images would be written') + parser.add_argument('-type','--type', dest='inp3', default=None, help='this defines how output should be. A 2d image array or a 3d image array encoded with RGB color. Just pass 2d or 3d. The file will be saved one directory up. 2D image array is 3d but only information of one channel would be enough since all channels have the same values.') + options=parser.parse_args() + + possibles=globals() + possibles.update(locals()) + x=pagexml2img(options.inp1,options.inp2,options.inp3) + x.run() +if __name__=="__main__": + main() + + + From 523a10bcfd6d2a4192a8a5110e0b327b8fcb1359 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 23 Feb 2024 20:48:07 +0100 Subject: [PATCH 05/34] Update README.md --- README.md | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index b095edb..d9b92c8 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Eynollah -> Document Layout Analysis (segmentation) using pre-trained models and heuristics +> Document Layout Analysis with Deep Learning and Heuristics [![PyPI Version](https://img.shields.io/pypi/v/eynollah)](https://pypi.org/project/eynollah/) [![CircleCI Build Status](https://circleci.com/gh/qurator-spk/eynollah.svg?style=shield)](https://circleci.com/gh/qurator-spk/eynollah) @@ -19,12 +19,11 @@ * [OCR-D](https://github.com/qurator-spk/eynollah#use-as-ocr-d-processor) interface ## Installation -Python versions `3.8-3.11` with Tensorflow versions >=`2.12` on Linux are currently supported. Unfortunately we can not currently support Windows or MacOS. -Windows users may be able to successfully run the tool through [WSL](https://learn.microsoft.com/en-us/windows/wsl/). +Python versions `3.8-3.11` with Tensorflow versions >=`2.12` on Linux are currently supported. While we can not provide support for Windows or MacOS, Windows users may be able to install and run the tool through Linux in [WSL](https://learn.microsoft.com/en-us/windows/wsl/). For (limited) GPU support the CUDA toolkit needs to be installed. -You can either install via +You can either install from PyPI via ``` pip install eynollah @@ -37,12 +36,10 @@ git clone git@github.com:qurator-spk/eynollah.git cd eynollah; pip install -e . ``` -Alternatively, you can run `make install` or `make install-dev` for editable installation. +Alternatively, run `make install` or `make install-dev` for editable installation. ## Models -Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/). - -In case you want to train your own model to use with Eynollah, have a look at [sbb_pixelwise_segmentation](https://github.com/qurator-spk/sbb_pixelwise_segmentation). +Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/). In case you want to train your own model with Eynollah, have a look at [`train`](https://github.com/qurator-spk/eynollah/tree/main/eynollah/eynollah/train). ## Usage The command-line interface can be called like this: From 713b90e0846af7f0994ff9205638cf2cbc27cb8f Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 23 Feb 2024 22:13:02 +0100 Subject: [PATCH 06/34] code formatting --- eynollah/eynollah/eynollah.py | 2170 +++++++++++++++++++-------------- 1 file changed, 1243 insertions(+), 927 deletions(-) diff --git a/eynollah/eynollah/eynollah.py b/eynollah/eynollah/eynollah.py index 49422fa..f3fda56 100644 --- a/eynollah/eynollah/eynollah.py +++ b/eynollah/eynollah/eynollah.py @@ -17,12 +17,14 @@ import gc from ocrd_utils import getLogger import cv2 import numpy as np + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" stderr = sys.stderr sys.stderr = open(os.devnull, "w") import tensorflow as tf from tensorflow.python.keras import backend as K from tensorflow.keras.models import load_model + sys.stderr = stderr tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") @@ -79,14 +81,14 @@ from .plot import EynollahPlotter from .writer import EynollahXmlWriter SLOPE_THRESHOLD = 0.13 -RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: +RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 # 98.45: DPI_THRESHOLD = 298 MAX_SLOPE = 999 KERNEL = np.ones((5, 5), np.uint8) projection_dim = 64 patch_size = 1 -num_patches =21*21#14*14#28*28#14*14#28*28 +num_patches = 21 * 21 # 14*14#28*28#14*14#28*28 class Patches(layers.Layer): @@ -106,15 +108,15 @@ class Patches(layers.Layer): patch_dims = patches.shape[-1] patches = tf.reshape(patches, [batch_size, -1, patch_dims]) return patches - def get_config(self): + def get_config(self): config = super().get_config().copy() config.update({ 'patch_size': self.patch_size, }) return config - - + + class PatchEncoder(layers.Layer): def __init__(self, **kwargs): super(PatchEncoder, self).__init__() @@ -128,8 +130,8 @@ class PatchEncoder(layers.Layer): positions = tf.range(start=0, limit=self.num_patches, delta=1) encoded = self.projection(patch) + self.position_embedding(positions) return encoded - def get_config(self): + def get_config(self): config = super().get_config().copy() config.update({ 'num_patches': self.num_patches, @@ -138,35 +140,36 @@ class PatchEncoder(layers.Layer): }) return config + class Eynollah: def __init__( - self, - dir_models, - image_filename=None, - image_pil=None, - image_filename_stem=None, - dir_out=None, - dir_in=None, - dir_of_cropped_images=None, - dir_of_layout=None, - dir_of_deskewed=None, - dir_of_all=None, - dir_save_page=None, - enable_plotting=False, - allow_enhancement=False, - curved_line=False, - textline_light=False, - full_layout=False, - tables=False, - right2left=False, - input_binary=False, - allow_scaling=False, - headers_off=False, - light_version=False, - ignore_page_extraction=False, - override_dpi=None, - logger=None, - pcgts=None, + self, + dir_models, + image_filename=None, + image_pil=None, + image_filename_stem=None, + dir_out=None, + dir_in=None, + dir_of_cropped_images=None, + dir_of_layout=None, + dir_of_deskewed=None, + dir_of_all=None, + dir_save_page=None, + enable_plotting=False, + allow_enhancement=False, + curved_line=False, + textline_light=False, + full_layout=False, + tables=False, + right2left=False, + input_binary=False, + allow_scaling=False, + headers_off=False, + light_version=False, + ignore_page_extraction=False, + override_dpi=None, + logger=None, + pcgts=None, ): if not dir_in: if image_pil: @@ -181,9 +184,9 @@ class Eynollah: self.dir_of_all = dir_of_all self.dir_save_page = dir_save_page self.dir_of_deskewed = dir_of_deskewed - self.dir_of_deskewed = dir_of_deskewed - self.dir_of_cropped_images=dir_of_cropped_images - self.dir_of_layout=dir_of_layout + self.dir_of_deskewed = dir_of_deskewed + self.dir_of_cropped_images = dir_of_cropped_images + self.dir_of_layout = dir_of_layout self.enable_plotting = enable_plotting self.allow_enhancement = allow_enhancement self.curved_line = curved_line @@ -210,7 +213,7 @@ class Eynollah: dir_out=self.dir_out, image_filename=self.image_filename, curved_line=self.curved_line, - textline_light = self.textline_light, + textline_light=self.textline_light, pcgts=pcgts) self.logger = logger if logger else getLogger('eynollah') self.dir_models = dir_models @@ -230,15 +233,15 @@ class Eynollah: else: self.model_textline_dir = dir_models + "/eynollah-textline_20210425" self.model_tables = dir_models + "/eynollah-tables_20210319" - + self.models = {} - + if dir_in and light_version: config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True session = tf.compat.v1.Session(config=config) set_session(session) - + self.model_page = self.our_load_model(self.model_page_dir) self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) self.model_bin = self.our_load_model(self.model_dir_of_binarization) @@ -246,15 +249,15 @@ class Eynollah: self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) - - self.ls_imgs = os.listdir(self.dir_in) - + + self.ls_imgs = os.listdir(self.dir_in) + if dir_in and not light_version: config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True session = tf.compat.v1.Session(config=config) set_session(session) - + self.model_page = self.our_load_model(self.model_page_dir) self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) self.model_bin = self.our_load_model(self.model_dir_of_binarization) @@ -264,10 +267,9 @@ class Eynollah: self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) - - self.ls_imgs = os.listdir(self.dir_in) - - + + self.ls_imgs = os.listdir(self.dir_in) + def _cache_images(self, image_filename=None, image_pil=None): ret = {} if image_filename: @@ -277,13 +279,14 @@ class Eynollah: ret['img'] = pil2cv(image_pil) self.dpi = check_dpi(image_pil) ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY) - for prefix in ('', '_grayscale'): + for prefix in ('', '_grayscale'): ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8) return ret + def reset_file_name_dir(self, image_filename): self._imgs = self._cache_images(image_filename=image_filename) self.image_filename = image_filename - + self.plotter = None if not self.enable_plotting else EynollahPlotter( dir_out=self.dir_out, dir_of_all=self.dir_of_all, @@ -292,13 +295,14 @@ class Eynollah: dir_of_cropped_images=self.dir_of_cropped_images, dir_of_layout=self.dir_of_layout, image_filename_stem=Path(Path(image_filename).name).stem) - + self.writer = EynollahXmlWriter( dir_out=self.dir_out, image_filename=self.image_filename, curved_line=self.curved_line, - textline_light = self.textline_light, + textline_light=self.textline_light, pcgts=self.pcgts) + def imread(self, grayscale=False, uint8=True): key = 'img' if grayscale: @@ -306,11 +310,10 @@ class Eynollah: if uint8: key += '_uint8' return self._imgs[key].copy() - + def isNaN(self, num): return num != num - def predict_enhancement(self, img): self.logger.debug("enter predict_enhancement") model_enhancement, session_enhancement = self.start_new_session_and_model(self.model_dir_of_enhancement) @@ -360,39 +363,41 @@ class Eynollah: index_y_d = img_h - img_height_model img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model_enhancement.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), - verbose=0) + label_p_pred = model_enhancement.predict( + img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), + verbose=0) seg = label_p_pred[0, :, :, :] seg = seg * 255 if i == 0 and j == 0: - seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg + seg = seg[0: seg.shape[0] - margin, 0: seg.shape[1] - margin] + prediction_true[index_y_d + 0: index_y_u - margin, index_x_d + 0: index_x_u - margin, :] = seg elif i == nxf - 1 and j == nyf - 1: - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg + seg = seg[margin: seg.shape[0] - 0, margin: seg.shape[1] - 0] + prediction_true[index_y_d + margin: index_y_u - 0, index_x_d + margin: index_x_u - 0, :] = seg elif i == 0 and j == nyf - 1: - seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg + seg = seg[margin: seg.shape[0] - 0, 0: seg.shape[1] - margin] + prediction_true[index_y_d + margin: index_y_u - 0, index_x_d + 0: index_x_u - margin, :] = seg elif i == nxf - 1 and j == 0: - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg + seg = seg[0: seg.shape[0] - margin, margin: seg.shape[1] - 0] + prediction_true[index_y_d + 0: index_y_u - margin, index_x_d + margin: index_x_u - 0, :] = seg elif i == 0 and j != 0 and j != nyf - 1: - seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg + seg = seg[margin: seg.shape[0] - margin, 0: seg.shape[1] - margin] + prediction_true[index_y_d + margin: index_y_u - margin, index_x_d + 0: index_x_u - margin, :] = seg elif i == nxf - 1 and j != 0 and j != nyf - 1: - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg + seg = seg[margin: seg.shape[0] - margin, margin: seg.shape[1] - 0] + prediction_true[index_y_d + margin: index_y_u - margin, index_x_d + margin: index_x_u - 0, :] = seg elif i != 0 and i != nxf - 1 and j == 0: - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg + seg = seg[0: seg.shape[0] - margin, margin: seg.shape[1] - margin] + prediction_true[index_y_d + 0: index_y_u - margin, index_x_d + margin: index_x_u - margin, :] = seg elif i != 0 and i != nxf - 1 and j == nyf - 1: - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg + seg = seg[margin: seg.shape[0] - 0, margin: seg.shape[1] - margin] + prediction_true[index_y_d + margin: index_y_u - 0, index_x_d + margin: index_x_u - margin, :] = seg else: - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg + seg = seg[margin: seg.shape[0] - margin, margin: seg.shape[1] - margin] + prediction_true[index_y_d + margin: index_y_u - margin, index_x_d + margin: index_x_u - margin, + :] = seg prediction_true = prediction_true.astype(int) return prediction_true @@ -472,7 +477,8 @@ class Eynollah: _, page_coord = self.early_page_for_num_of_column_classification(img) if not self.dir_in: - model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) + model_num_classifier, session_col_classifier = self.start_new_session_and_model( + self.model_dir_of_col_classifier) if self.input_binary: img_in = np.copy(img) img_in = img_in / 255.0 @@ -482,7 +488,7 @@ class Eynollah: else: img_1ch = self.imread(grayscale=True, uint8=False) width_early = img_1ch.shape[1] - img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + img_1ch = img_1ch[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] # plt.imshow(img_1ch) # plt.show() @@ -512,7 +518,7 @@ class Eynollah: return img, img_new, is_image_enhanced - def resize_and_enhance_image_with_column_classifier(self,light_version): + def resize_and_enhance_image_with_column_classifier(self, light_version): self.logger.debug("enter resize_and_enhance_image_with_column_classifier") dpi = self.dpi self.logger.info("Detected %s DPI", dpi) @@ -521,18 +527,18 @@ class Eynollah: if self.dir_in: prediction_bin = self.do_prediction(True, img, self.model_bin) else: - + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img, model_bin) - - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + prediction_bin = prediction_bin[:, :, 0] + prediction_bin = (prediction_bin[:, :] == 0) * 1 + prediction_bin = prediction_bin * 255 + + prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) prediction_bin = prediction_bin.astype(np.uint8) - img= np.copy(prediction_bin) + img = np.copy(prediction_bin) img_bin = np.copy(prediction_bin) else: img = self.imread() @@ -541,8 +547,9 @@ class Eynollah: t1 = time.time() _, page_coord = self.early_page_for_num_of_column_classification(img_bin) if not self.dir_in: - model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) - + model_num_classifier, session_col_classifier = self.start_new_session_and_model( + self.model_dir_of_col_classifier) + if self.input_binary: img_in = np.copy(img) width_early = img_in.shape[1] @@ -552,7 +559,7 @@ class Eynollah: else: img_1ch = self.imread(grayscale=True) width_early = img_1ch.shape[1] - img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + img_1ch = img_1ch[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] img_1ch = img_1ch / 255.0 img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) @@ -561,17 +568,17 @@ class Eynollah: img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :] - if self.dir_in: label_p_pred = self.model_classifier.predict(img_in, verbose=0) else: label_p_pred = model_num_classifier.predict(img_in, verbose=0) num_col = np.argmax(label_p_pred[0]) + 1 - + self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) if dpi < DPI_THRESHOLD: - img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) + img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, + label_p_pred) if light_version: image_res = np.copy(img_new) else: @@ -645,12 +652,11 @@ class Eynollah: return model, session - def start_new_session_and_model(self, model_dir): self.logger.debug("enter start_new_session_and_model (model_dir=%s)", model_dir) - #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) - #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True) - #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) + # gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) + # gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True) + # session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) physical_devices = tf.config.list_physical_devices('GPU') try: for device in physical_devices: @@ -668,10 +674,10 @@ class Eynollah: model = load_model(model_dir, compile=False) self.models[model_dir] = model except: - model = load_model(model_dir , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) + model = load_model(model_dir, compile=False, + custom_objects={"PatchEncoder": PatchEncoder, "Patches": Patches}) self.models[model_dir] = model - return model, None def do_prediction(self, patches, img, model, marginal_of_patch_percent=0.1): @@ -694,7 +700,6 @@ class Eynollah: prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) - else: if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) @@ -739,61 +744,73 @@ class Eynollah: index_y_d = img_h - img_height_model img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), - verbose=0) + label_p_pred = model.predict( + img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), + verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) if i == 0 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - #seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color + seg_color = seg_color[0: seg_color.shape[0] - margin, 0: seg_color.shape[1] - margin, :] + # seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] + # mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg + prediction_true[index_y_d + 0: index_y_u - margin, index_x_d + 0: index_x_u - margin, + :] = seg_color elif i == nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - 0, margin: seg_color.shape[1] - 0, :] + # seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] + # mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg + prediction_true[index_y_d + margin: index_y_u - 0, index_x_d + margin: index_x_u - 0, + :] = seg_color elif i == 0 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - 0, 0: seg_color.shape[1] - margin, :] + # seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] + # mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg + prediction_true[index_y_d + margin: index_y_u - 0, index_x_d + 0: index_x_u - margin, + :] = seg_color elif i == nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color + seg_color = seg_color[0: seg_color.shape[0] - margin, margin: seg_color.shape[1] - 0, :] + # seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] + # mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg + prediction_true[index_y_d + 0: index_y_u - margin, index_x_d + margin: index_x_u - 0, + :] = seg_color elif i == 0 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - margin, 0: seg_color.shape[1] - margin, :] + # seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] + # mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg + prediction_true[index_y_d + margin: index_y_u - margin, index_x_d + 0: index_x_u - margin, + :] = seg_color elif i == nxf - 1 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - margin, margin: seg_color.shape[1] - 0, :] + # seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] + # mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg + prediction_true[index_y_d + margin: index_y_u - margin, index_x_d + margin: index_x_u - 0, + :] = seg_color elif i != 0 and i != nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + seg_color = seg_color[0: seg_color.shape[0] - margin, margin: seg_color.shape[1] - margin, :] + # seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] + # mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg + prediction_true[index_y_d + 0: index_y_u - margin, index_x_d + margin: index_x_u - margin, + :] = seg_color elif i != 0 and i != nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - 0, margin: seg_color.shape[1] - margin, :] + # seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] + # mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg + prediction_true[index_y_d + margin: index_y_u - 0, index_x_d + margin: index_x_u - margin, + :] = seg_color else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - margin, margin: seg_color.shape[1] - margin, + :] + # seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] + # mask_true[index_y_d + margin: index_y_u - margin, index_x_d + margin: index_x_u - margin] = seg + prediction_true[index_y_d + margin: index_y_u - margin, index_x_d + margin: index_x_u - margin, + :] = seg_color prediction_true = prediction_true.astype(np.uint8) - #del model - #gc.collect() + # del model + # gc.collect() return prediction_true + def do_prediction_new_concept(self, patches, img, model, marginal_of_patch_percent=0.1): self.logger.debug("enter do_prediction") @@ -808,13 +825,11 @@ class Eynollah: label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) - seg = np.argmax(label_p_pred, axis=3)[0] seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) - else: if img.shape[0] < img_height_model: img = resize_image(img, img_height_model, img.shape[1]) @@ -859,104 +874,106 @@ class Eynollah: index_y_d = img_h - img_height_model img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), - verbose=0) + label_p_pred = model.predict( + img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), + verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] - - - seg_not_base = label_p_pred[0,:,:,4] - ##seg2 = -label_p_pred[0,:,:,2] - - - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 - - - - seg_test = label_p_pred[0,:,:,1] - ##seg2 = -label_p_pred[0,:,:,2] - - - seg_test[seg_test>0.75] =1 - seg_test[seg_test<1] =0 - - - seg_line = label_p_pred[0,:,:,3] - ##seg2 = -label_p_pred[0,:,:,2] - - - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 - - - seg_background = label_p_pred[0,:,:,0] - ##seg2 = -label_p_pred[0,:,:,2] - - - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 - ##seg = seg+seg2 - #seg = label_p_pred[0,:,:,2] - #seg[seg>0.4] =1 - #seg[seg<1] =0 - - ##plt.imshow(seg_test) - ##plt.show() - - ##plt.imshow(seg_background) - ##plt.show() - #seg[seg==1]=0 - #seg[seg_test==1]=1 - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 - seg[(seg_line==1) & (seg==0)]=3 + + seg_not_base = label_p_pred[0, :, :, 4] + # seg2 = -label_p_pred[0,:,:,2] + + seg_not_base[seg_not_base > 0.03] = 1 + seg_not_base[seg_not_base < 1] = 0 + + seg_test = label_p_pred[0, :, :, 1] + # seg2 = -label_p_pred[0,:,:,2] + + seg_test[seg_test > 0.75] = 1 + seg_test[seg_test < 1] = 0 + + seg_line = label_p_pred[0, :, :, 3] + # seg2 = -label_p_pred[0,:,:,2] + + seg_line[seg_line > 0.1] = 1 + seg_line[seg_line < 1] = 0 + + seg_background = label_p_pred[0, :, :, 0] + # seg2 = -label_p_pred[0,:,:,2] + + seg_background[seg_background > 0.25] = 1 + seg_background[seg_background < 1] = 0 + # seg = seg+seg2 + # seg = label_p_pred[0,:,:,2] + # seg[seg>0.4] =1 + # seg[seg<1] =0 + + # plt.imshow(seg_test) + # plt.show() + + # plt.imshow(seg_background) + # plt.show() + # seg[seg==1]=0 + # seg[seg_test==1]=1 + seg[seg_not_base == 1] = 4 + seg[seg_background == 1] = 0 + seg[(seg_line == 1) & (seg == 0)] = 3 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) if i == 0 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color + seg_color = seg_color[0: seg_color.shape[0] - margin, 0: seg_color.shape[1] - margin, :] + seg = seg[0: seg.shape[0] - margin, 0: seg.shape[1] - margin] + mask_true[index_y_d + 0: index_y_u - margin, index_x_d + 0: index_x_u - margin] = seg + prediction_true[index_y_d + 0: index_y_u - margin, index_x_d + 0: index_x_u - margin, + :] = seg_color elif i == nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - 0, margin: seg_color.shape[1] - 0, :] + seg = seg[margin: seg.shape[0] - 0, margin: seg.shape[1] - 0] + mask_true[index_y_d + margin: index_y_u - 0, index_x_d + margin: index_x_u - 0] = seg + prediction_true[index_y_d + margin: index_y_u - 0, index_x_d + margin: index_x_u - 0, + :] = seg_color elif i == 0 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - 0, 0: seg_color.shape[1] - margin, :] + seg = seg[margin: seg.shape[0] - 0, 0: seg.shape[1] - margin] + mask_true[index_y_d + margin: index_y_u - 0, index_x_d + 0: index_x_u - margin] = seg + prediction_true[index_y_d + margin: index_y_u - 0, index_x_d + 0: index_x_u - margin, + :] = seg_color elif i == nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color + seg_color = seg_color[0: seg_color.shape[0] - margin, margin: seg_color.shape[1] - 0, :] + seg = seg[0: seg.shape[0] - margin, margin: seg.shape[1] - 0] + mask_true[index_y_d + 0: index_y_u - margin, index_x_d + margin: index_x_u - 0] = seg + prediction_true[index_y_d + 0: index_y_u - margin, index_x_d + margin: index_x_u - 0, + :] = seg_color elif i == 0 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - margin, 0: seg_color.shape[1] - margin, :] + seg = seg[margin: seg.shape[0] - margin, 0: seg.shape[1] - margin] + mask_true[index_y_d + margin: index_y_u - margin, index_x_d + 0: index_x_u - margin] = seg + prediction_true[index_y_d + margin: index_y_u - margin, index_x_d + 0: index_x_u - margin, + :] = seg_color elif i == nxf - 1 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - margin, margin: seg_color.shape[1] - 0, :] + seg = seg[margin: seg.shape[0] - margin, margin: seg.shape[1] - 0] + mask_true[index_y_d + margin: index_y_u - margin, index_x_d + margin: index_x_u - 0] = seg + prediction_true[index_y_d + margin: index_y_u - margin, index_x_d + margin: index_x_u - 0, + :] = seg_color elif i != 0 and i != nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + seg_color = seg_color[0: seg_color.shape[0] - margin, margin: seg_color.shape[1] - margin, :] + seg = seg[0: seg.shape[0] - margin, margin: seg.shape[1] - margin] + mask_true[index_y_d + 0: index_y_u - margin, index_x_d + margin: index_x_u - margin] = seg + prediction_true[index_y_d + 0: index_y_u - margin, index_x_d + margin: index_x_u - margin, + :] = seg_color elif i != 0 and i != nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - 0, margin: seg_color.shape[1] - margin, :] + seg = seg[margin: seg.shape[0] - 0, margin: seg.shape[1] - margin] + mask_true[index_y_d + margin: index_y_u - 0, index_x_d + margin: index_x_u - margin] = seg + prediction_true[index_y_d + margin: index_y_u - 0, index_x_d + margin: index_x_u - margin, + :] = seg_color else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + seg_color = seg_color[margin: seg_color.shape[0] - margin, margin: seg_color.shape[1] - margin, + :] + seg = seg[margin: seg.shape[0] - margin, margin: seg.shape[1] - margin] + mask_true[index_y_d + margin: index_y_u - margin, index_x_d + margin: index_x_u - margin] = seg + prediction_true[index_y_d + margin: index_y_u - margin, index_x_d + margin: index_x_u - margin, + :] = seg_color prediction_true = prediction_true.astype(np.uint8) return prediction_true @@ -966,10 +983,10 @@ class Eynollah: cont_page = [] if not self.ignore_page_extraction: img = cv2.GaussianBlur(self.image, (5, 5), 0) - + if not self.dir_in: model_page, session_page = self.start_new_session_and_model(self.model_page_dir) - + if not self.dir_in: img_page_prediction = self.do_prediction(False, img, model_page) else: @@ -978,8 +995,8 @@ class Eynollah: _, thresh = cv2.threshold(imgray, 0, 255, 0) thresh = cv2.dilate(thresh, KERNEL, iterations=3) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - - if len(contours)>0: + + if len(contours) > 0: cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] x, y, w, h = cv2.boundingRect(cnt) @@ -998,27 +1015,31 @@ class Eynollah: else: box = [0, 0, img.shape[1], img.shape[0]] croped_page, page_coord = crop_image_inside_box(box, self.image) - cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) - + cont_page.append(np.array( + [[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], + [page_coord[2], page_coord[1]]])) + self.logger.debug("exit extract_page") else: box = [0, 0, self.image.shape[1], self.image.shape[0]] croped_page, page_coord = crop_image_inside_box(box, self.image) - cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) + cont_page.append(np.array( + [[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], + [page_coord[2], page_coord[1]]])) return croped_page, page_coord, cont_page - def early_page_for_num_of_column_classification(self,img_bin): + def early_page_for_num_of_column_classification(self, img_bin): if not self.ignore_page_extraction: self.logger.debug("enter early_page_for_num_of_column_classification") if self.input_binary: - img =np.copy(img_bin) + img = np.copy(img_bin) img = img.astype(np.uint8) else: img = self.imread() if not self.dir_in: model_page, session_page = self.start_new_session_and_model(self.model_page_dir) img = cv2.GaussianBlur(img, (5, 5), 0) - + if self.dir_in: img_page_prediction = self.do_prediction(False, img, self.model_page) else: @@ -1028,7 +1049,7 @@ class Eynollah: _, thresh = cv2.threshold(imgray, 0, 255, 0) thresh = cv2.dilate(thresh, KERNEL, iterations=3) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - if len(contours)>0: + if len(contours) > 0: cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt = contours[np.argmax(cnt_size)] x, y, w, h = cv2.boundingRect(cnt) @@ -1036,7 +1057,7 @@ class Eynollah: else: box = [0, 0, img.shape[1], img.shape[0]] croped_page, page_coord = crop_image_inside_box(box, img) - + self.logger.debug("exit early_page_for_num_of_column_classification") else: img = self.imread() @@ -1049,7 +1070,8 @@ class Eynollah: img_height_h = img.shape[0] img_width_h = img.shape[1] if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np) + model_region, session_region = self.start_new_session_and_model( + self.model_region_dir_fully if patches else self.model_region_dir_fully_np) else: model_region = self.model_region_fl if patches else self.model_region_fl_np @@ -1108,39 +1130,40 @@ class Eynollah: if (self.scale_x == 1 and img_width_h > 4000) or (self.scale_x != 1 and img_width_h > 3700): img = otsu_copy_binary(img) img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 3700 / float(img_width_h)), 3700) + img = resize_image(img, int(img_height_h * 3700 / float(img_width_h)), 3700) else: img = otsu_copy_binary(img) img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) + img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) if cols == 5: if self.scale_x == 1 and img_width_h > 5000: img = otsu_copy_binary(img) img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 0.7), int(img_width_h * 0.7)) + img = resize_image(img, int(img_height_h * 0.7), int(img_width_h * 0.7)) else: img = otsu_copy_binary(img) img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9) ) + img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) if cols >= 6: if img_width_h > 5600: img = otsu_copy_binary(img) img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 5600 / float(img_width_h)), 5600) + img = resize_image(img, int(img_height_h * 5600 / float(img_width_h)), 5600) else: img = otsu_copy_binary(img) img = img.astype(np.uint8) - img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) + img = resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) marginal_of_patch_percent = 0.1 prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 - - def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + + def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, + slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new") num_cores = cpu_count() queue_of_all_params = Queue() @@ -1149,12 +1172,15 @@ class Eynollah: nh = np.linspace(0, len(boxes), num_cores + 1) indexes_by_text_con = np.array(range(len(contours_par))) for i in range(num_cores): - boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])] - contours_per_process = contours[int(nh[i]) : int(nh[i + 1])] - contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] - - processes.append(Process(target=self.do_work_of_slopes_new_light, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, indexes_text_con_per_process, image_page_rotated, slope_deskew))) + boxes_per_process = boxes[int(nh[i]): int(nh[i + 1])] + contours_per_process = contours[int(nh[i]): int(nh[i + 1])] + contours_par_per_process = contours_par[int(nh[i]): int(nh[i + 1])] + indexes_text_con_per_process = indexes_by_text_con[int(nh[i]): int(nh[i + 1])] + + processes.append(Process(target=self.do_work_of_slopes_new_light, args=( + queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, + contours_par_per_process, + indexes_text_con_per_process, image_page_rotated, slope_deskew))) for i in range(num_cores): processes[i].start() @@ -1188,7 +1214,8 @@ class Eynollah: self.logger.debug("exit get_slopes_and_deskew_new") return slopes, all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con - def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + def get_slopes_and_deskew_new(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, + slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new") num_cores = cpu_count() queue_of_all_params = Queue() @@ -1197,12 +1224,15 @@ class Eynollah: nh = np.linspace(0, len(boxes), num_cores + 1) indexes_by_text_con = np.array(range(len(contours_par))) for i in range(num_cores): - boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])] - contours_per_process = contours[int(nh[i]) : int(nh[i + 1])] - contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] - - processes.append(Process(target=self.do_work_of_slopes_new, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, indexes_text_con_per_process, image_page_rotated, slope_deskew))) + boxes_per_process = boxes[int(nh[i]): int(nh[i + 1])] + contours_per_process = contours[int(nh[i]): int(nh[i + 1])] + contours_par_per_process = contours_par[int(nh[i]): int(nh[i + 1])] + indexes_text_con_per_process = indexes_by_text_con[int(nh[i]): int(nh[i + 1])] + + processes.append(Process(target=self.do_work_of_slopes_new, args=( + queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, + contours_par_per_process, + indexes_text_con_per_process, image_page_rotated, slope_deskew))) for i in range(num_cores): processes[i].start() @@ -1236,7 +1266,8 @@ class Eynollah: self.logger.debug("exit get_slopes_and_deskew_new") return slopes, all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con - def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, mask_texts_only, num_col, scale_par, slope_deskew): + def get_slopes_and_deskew_new_curved(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, + mask_texts_only, num_col, scale_par, slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new_curved") num_cores = cpu_count() queue_of_all_params = Queue() @@ -1246,12 +1277,15 @@ class Eynollah: indexes_by_text_con = np.array(range(len(contours_par))) for i in range(num_cores): - boxes_per_process = boxes[int(nh[i]) : int(nh[i + 1])] - contours_per_process = contours[int(nh[i]) : int(nh[i + 1])] - contours_par_per_process = contours_par[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] + boxes_per_process = boxes[int(nh[i]): int(nh[i + 1])] + contours_per_process = contours[int(nh[i]): int(nh[i + 1])] + contours_par_per_process = contours_par[int(nh[i]): int(nh[i + 1])] + indexes_text_con_per_process = indexes_by_text_con[int(nh[i]): int(nh[i + 1])] - processes.append(Process(target=self.do_work_of_slopes_new_curved, args=(queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, contours_par_per_process, image_page_rotated, mask_texts_only, num_col, scale_par, indexes_text_con_per_process, slope_deskew))) + processes.append(Process(target=self.do_work_of_slopes_new_curved, args=( + queue_of_all_params, boxes_per_process, textline_mask_tot, contours_per_process, + contours_par_per_process, + image_page_rotated, mask_texts_only, num_col, scale_par, indexes_text_con_per_process, slope_deskew))) for i in range(num_cores): processes[i].start() @@ -1287,7 +1321,9 @@ class Eynollah: # print(slopes,'slopes') return all_found_textline_polygons, boxes, all_found_text_regions, all_found_text_regions_par, all_box_coord, all_index_text_con, slopes - def do_work_of_slopes_new_curved(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, image_page_rotated, mask_texts_only, num_col, scale_par, indexes_r_con_per_pro, slope_deskew): + def do_work_of_slopes_new_curved(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, + contours_par_per_process, image_page_rotated, mask_texts_only, num_col, scale_par, + indexes_r_con_per_pro, slope_deskew): self.logger.debug("enter do_work_of_slopes_new_curved") slopes_per_each_subprocess = [] bounding_box_of_textregion_per_each_subprocess = [] @@ -1301,7 +1337,8 @@ class Eynollah: for mv in range(len(boxes_text)): - all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] + all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1]: boxes_text[mv][1] + boxes_text[mv][3], + boxes_text[mv][0]: boxes_text[mv][0] + boxes_text[mv][2]] all_text_region_raw = all_text_region_raw.astype(np.uint8) img_int_p = all_text_region_raw[:, :] @@ -1315,7 +1352,8 @@ class Eynollah: else: try: textline_con, hierarchy = return_contours_of_image(img_int_p) - textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.0008) + textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, + min_area=0.0008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if self.isNaN(y_diff_mean): slope_for_all = MAX_SLOPE @@ -1345,19 +1383,20 @@ class Eynollah: x, y, w, h = cv2.boundingRect(cnt_o_t_max) mask_biggest = np.zeros(mask_texts_only.shape) mask_biggest = cv2.fillPoly(mask_biggest, pts=[cnt_o_t_max], color=(1, 1, 1)) - mask_region_in_patch_region = mask_biggest[y : y + h, x : x + w] + mask_region_in_patch_region = mask_biggest[y: y + h, x: x + w] textline_biggest_region = mask_biggest * textline_mask_tot_ea # print(slope_for_all,'slope_for_all') - textline_rotated_separated = separate_lines_new2(textline_biggest_region[y : y + h, x : x + w], 0, num_col, slope_for_all, plotter=self.plotter) + textline_rotated_separated = separate_lines_new2(textline_biggest_region[y: y + h, x: x + w], 0, + num_col, slope_for_all, plotter=self.plotter) # new line added - ##print(np.shape(textline_rotated_separated),np.shape(mask_biggest)) + # print(np.shape(textline_rotated_separated),np.shape(mask_biggest)) textline_rotated_separated[mask_region_in_patch_region[:, :] != 1] = 0 # till here - textline_cnt_separated[y : y + h, x : x + w] = textline_rotated_separated - textline_region_in_image[y : y + h, x : x + w] = textline_rotated_separated + textline_cnt_separated[y: y + h, x: x + w] = textline_rotated_separated + textline_region_in_image[y: y + h, x: x + w] = textline_rotated_separated # plt.imshow(textline_region_in_image) # plt.show() @@ -1377,7 +1416,8 @@ class Eynollah: mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4) pixel_img = 1 - mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par)) + mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), + int(mask_biggest2.shape[1] * scale_par)) cnt_textlines_in_image_ind = return_contours_of_interested_textline(mask_biggest2, pixel_img) try: textlines_cnt_per_region.append(cnt_textlines_in_image_ind[0]) @@ -1385,7 +1425,10 @@ class Eynollah: self.logger.error(why) else: add_boxes_coor_into_textlines = True - textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv], add_boxes_coor_into_textlines) + textlines_cnt_per_region = textline_contours_postprocessing(all_text_region_raw, slope_for_all, + contours_par_per_process[mv], + boxes_text[mv], + add_boxes_coor_into_textlines) add_boxes_coor_into_textlines = False # print(np.shape(textlines_cnt_per_region),'textlines_cnt_per_region') @@ -1395,8 +1438,13 @@ class Eynollah: contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv]) all_box_coord_per_process.append(crop_coor) - queue_of_all_params.put([textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours, slopes_per_each_subprocess]) - def do_work_of_slopes_new_light(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew): + queue_of_all_params.put( + [textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, + contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, + all_box_coord_per_process, index_by_text_region_contours, slopes_per_each_subprocess]) + + def do_work_of_slopes_new_light(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, + contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew): self.logger.debug('enter do_work_of_slopes_new_light') slopes_per_each_subprocess = [] bounding_box_of_textregion_per_each_subprocess = [] @@ -1406,29 +1454,37 @@ class Eynollah: all_box_coord_per_process = [] index_by_text_region_contours = [] for mv in range(len(boxes_text)): - _, crop_coor = crop_image_inside_box(boxes_text[mv],image_page_rotated) - mask_textline = np.zeros((textline_mask_tot_ea.shape)) - mask_textline = cv2.fillPoly(mask_textline,pts=[contours_per_process[mv]],color=(1,1,1)) - all_text_region_raw = (textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ] - all_text_region_raw=all_text_region_raw.astype(np.uint8) + _, crop_coor = crop_image_inside_box(boxes_text[mv], image_page_rotated) + mask_textline = np.zeros(textline_mask_tot_ea.shape) + mask_textline = cv2.fillPoly(mask_textline, pts=[contours_per_process[mv]], color=(1, 1, 1)) + all_text_region_raw = (textline_mask_tot_ea * mask_textline[:, :])[ + boxes_text[mv][1]:boxes_text[mv][1] + boxes_text[mv][3], + boxes_text[mv][0]:boxes_text[mv][0] + boxes_text[mv][2]] + all_text_region_raw = all_text_region_raw.astype(np.uint8) slopes_per_each_subprocess.append([slope_deskew][0]) mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) - mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) + mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], + color=(1, 1, 1)) # plt.imshow(mask_only_con_region) # plt.show() - + if self.textline_light: all_text_region_raw = np.copy(textline_mask_tot_ea) all_text_region_raw[mask_only_con_region == 0] = 0 cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(all_text_region_raw) - cnt_clean_rot = filter_contours_area_of_image(all_text_region_raw, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + cnt_clean_rot = filter_contours_area_of_image(all_text_region_raw, cnt_clean_rot_raw, + hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) else: - all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]) - mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] + all_text_region_raw = np.copy( + textline_mask_tot_ea[boxes_text[mv][1]: boxes_text[mv][1] + boxes_text[mv][3], + boxes_text[mv][0]: boxes_text[mv][0] + boxes_text[mv][2]]) + mask_only_con_region = mask_only_con_region[boxes_text[mv][1]: boxes_text[mv][1] + boxes_text[mv][3], + boxes_text[mv][0]: boxes_text[mv][0] + boxes_text[mv][2]] all_text_region_raw[mask_only_con_region == 0] = 0 - cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, [slope_deskew][0], contours_par_per_process[mv], boxes_text[mv]) + cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, [slope_deskew][0], + contours_par_per_process[mv], boxes_text[mv]) textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) @@ -1437,9 +1493,13 @@ class Eynollah: contours_textregion_per_each_subprocess.append(contours_per_process[mv]) contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv]) all_box_coord_per_process.append(crop_coor) - queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) - - def do_work_of_slopes_new(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew): + queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, + bounding_box_of_textregion_per_each_subprocess, + contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, + all_box_coord_per_process, index_by_text_region_contours]) + + def do_work_of_slopes_new(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, + contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew): self.logger.debug('enter do_work_of_slopes_new') slopes_per_each_subprocess = [] bounding_box_of_textregion_per_each_subprocess = [] @@ -1449,26 +1509,31 @@ class Eynollah: all_box_coord_per_process = [] index_by_text_region_contours = [] for mv in range(len(boxes_text)): - _, crop_coor = crop_image_inside_box(boxes_text[mv],image_page_rotated) - mask_textline = np.zeros((textline_mask_tot_ea.shape)) - mask_textline = cv2.fillPoly(mask_textline,pts=[contours_per_process[mv]],color=(1,1,1)) - all_text_region_raw = (textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ] - all_text_region_raw=all_text_region_raw.astype(np.uint8) - img_int_p=all_text_region_raw[:,:]#self.all_text_region_raw[mv] - img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2) - - if img_int_p.shape[0]/img_int_p.shape[1]<0.1: + _, crop_coor = crop_image_inside_box(boxes_text[mv], image_page_rotated) + mask_textline = np.zeros(textline_mask_tot_ea.shape) + mask_textline = cv2.fillPoly(mask_textline, pts=[contours_per_process[mv]], color=(1, 1, 1)) + all_text_region_raw = (textline_mask_tot_ea * mask_textline[:, :])[ + boxes_text[mv][1]:boxes_text[mv][1] + boxes_text[mv][3], + boxes_text[mv][0]:boxes_text[mv][0] + boxes_text[mv][2]] + all_text_region_raw = all_text_region_raw.astype(np.uint8) + img_int_p = all_text_region_raw[:, :] # self.all_text_region_raw[mv] + img_int_p = cv2.erode(img_int_p, KERNEL, iterations=2) + + if img_int_p.shape[0] / img_int_p.shape[1] < 0.1: slopes_per_each_subprocess.append(0) slope_for_all = [slope_deskew][0] - all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] - cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv], 0) + all_text_region_raw = textline_mask_tot_ea[boxes_text[mv][1]: boxes_text[mv][1] + boxes_text[mv][3], + boxes_text[mv][0]: boxes_text[mv][0] + boxes_text[mv][2]] + cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, + contours_par_per_process[mv], boxes_text[mv], 0) textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) bounding_box_of_textregion_per_each_subprocess.append(boxes_text[mv]) else: try: textline_con, hierarchy = return_contours_of_image(img_int_p) - textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, min_area=0.00008) + textline_con_fil = filter_contours_area_of_image(img_int_p, textline_con, hierarchy, max_area=1, + min_area=0.00008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) if self.isNaN(y_diff_mean): slope_for_all = MAX_SLOPE @@ -1487,22 +1552,27 @@ class Eynollah: slope_for_all = [slope_deskew][0] slopes_per_each_subprocess.append(slope_for_all) mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) - mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) + mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], + color=(1, 1, 1)) # plt.imshow(mask_only_con_region) # plt.show() - all_text_region_raw = np.copy(textline_mask_tot_ea[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]]) - mask_only_con_region = mask_only_con_region[boxes_text[mv][1] : boxes_text[mv][1] + boxes_text[mv][3], boxes_text[mv][0] : boxes_text[mv][0] + boxes_text[mv][2]] + all_text_region_raw = np.copy( + textline_mask_tot_ea[boxes_text[mv][1]: boxes_text[mv][1] + boxes_text[mv][3], + boxes_text[mv][0]: boxes_text[mv][0] + boxes_text[mv][2]]) + mask_only_con_region = mask_only_con_region[boxes_text[mv][1]: boxes_text[mv][1] + boxes_text[mv][3], + boxes_text[mv][0]: boxes_text[mv][0] + boxes_text[mv][2]] - ##plt.imshow(textline_mask_tot_ea) - ##plt.show() - ##plt.imshow(all_text_region_raw) - ##plt.show() - ##plt.imshow(mask_only_con_region) - ##plt.show() + # plt.imshow(textline_mask_tot_ea) + # plt.show() + # plt.imshow(all_text_region_raw) + # plt.show() + # plt.imshow(mask_only_con_region) + # plt.show() all_text_region_raw[mask_only_con_region == 0] = 0 - cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, contours_par_per_process[mv], boxes_text[mv]) + cnt_clean_rot = textline_contours_postprocessing(all_text_region_raw, slope_for_all, + contours_par_per_process[mv], boxes_text[mv]) textlines_rectangles_per_each_subprocess.append(cnt_clean_rot) index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) @@ -1511,12 +1581,16 @@ class Eynollah: contours_textregion_per_each_subprocess.append(contours_per_process[mv]) contours_textregion_par_per_each_subprocess.append(contours_par_per_process[mv]) all_box_coord_per_process.append(crop_coor) - queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) + queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, + bounding_box_of_textregion_per_each_subprocess, + contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, + all_box_coord_per_process, index_by_text_region_contours]) def textline_contours(self, img, patches, scaler_h, scaler_w): self.logger.debug('enter textline_contours') if not self.dir_in: - model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) + model_textline, session_textline = self.start_new_session_and_model( + self.model_textline_dir if patches else self.model_textline_dir_np) img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] @@ -1532,14 +1606,12 @@ class Eynollah: else: prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) - if self.textline_light: - return (prediction_textline[:, :, 0]==1)*1, (prediction_textline_longshot_true_size[:, :, 0]==1)*1 + return (prediction_textline[:, :, 0] == 1) * 1, (prediction_textline_longshot_true_size[:, :, 0] == 1) * 1 else: return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0] - def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): self.logger.debug('enter do_work_of_slopes') slope_biggest = 0 @@ -1547,12 +1619,14 @@ class Eynollah: boxes_sub_new = [] poly_sub = [] for mv in range(len(boxes_per_process)): - crop_img, _ = crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) + crop_img, _ = crop_image_inside_box(boxes_per_process[mv], + np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) crop_img = crop_img[:, :, 0] crop_img = cv2.erode(crop_img, KERNEL, iterations=2) try: textline_con, hierarchy = return_contours_of_image(crop_img) - textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, max_area=1, min_area=0.0008) + textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierarchy, max_area=1, + min_area=0.0008) y_diff_mean = find_contours_mean_y_diff(textline_con_fil) sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) crop_img[crop_img > 0] = 1 @@ -1565,7 +1639,8 @@ class Eynollah: slope_corresponding_textregion = slope_biggest slopes_sub.append(slope_corresponding_textregion) - cnt_clean_rot = textline_contours_postprocessing(crop_img, slope_corresponding_textregion, contours_per_process[mv], boxes_per_process[mv]) + cnt_clean_rot = textline_contours_postprocessing(crop_img, slope_corresponding_textregion, + contours_per_process[mv], boxes_per_process[mv]) poly_sub.append(cnt_clean_rot) boxes_sub_new.append(boxes_per_process[mv]) @@ -1573,29 +1648,28 @@ class Eynollah: q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) - def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): + + def get_regions_light_v(self, img, is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_light_v") erosion_hurts = False img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] - #model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) + # model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) - - if num_col_classifier == 1: img_w_new = 1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) - + elif num_col_classifier == 2: img_w_new = 1500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) - + elif num_col_classifier == 3: img_w_new = 2000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) - + elif num_col_classifier == 4: img_w_new = 2500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) @@ -1605,25 +1679,23 @@ class Eynollah: else: img_w_new = 4000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) - img_resized = resize_image(img,img_h_new, img_w_new ) - + img_resized = resize_image(img, img_h_new, img_w_new) + if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, model_bin) else: prediction_bin = self.do_prediction(True, img_resized, self.model_bin) - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - + prediction_bin = prediction_bin[:, :, 0] + prediction_bin = (prediction_bin[:, :] == 0) * 1 + prediction_bin = prediction_bin * 255 + + prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + prediction_bin = prediction_bin.astype(np.uint16) - #img= np.copy(prediction_bin) + # img = np.copy(prediction_bin) img_bin = np.copy(prediction_bin) - - - + textline_mask_tot_ea = self.run_textline(img_bin) if not self.dir_in: @@ -1631,121 +1703,117 @@ class Eynollah: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) - - #plt.imshow(prediction_regions_org[:,:,0]) - #plt.show() - - prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - - prediction_regions_org=prediction_regions_org[:,:,0] - - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - - mask_images_only=(prediction_regions_org[:,:] ==2)*1 - + + # plt.imshow(prediction_regions_org[:,:,0]) + # plt.show() + + prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea, img_height_h, img_width_h) + + prediction_regions_org = prediction_regions_org[:, :, 0] + + mask_lines_only = (prediction_regions_org[:, :] == 3) * 1 + + mask_texts_only = (prediction_regions_org[:, :] == 1) * 1 + + mask_images_only = (prediction_regions_org[:, :] == 2) * 1 + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - - - polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) - + polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, + hir_lines_xml, max_area=1, + min_area=0.00001) + + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) + + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) + text_regions_p_true = np.zeros(prediction_regions_org.shape) - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) - - text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3, 3, 3)) + + text_regions_p_true[:, :][mask_images_only[:, :] == 1] = 2 + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1, 1, 1)) + return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea - def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): + def get_regions_from_xy_2models(self, img, is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_from_xy_2models") erosion_hurts = False img_org = np.copy(img) img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] - + if not self.dir_in: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) - ratio_y=1.3 - ratio_x=1 + ratio_y = 1.3 + ratio_x = 1 - img = resize_image(img_org, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) + img = resize_image(img_org, int(img_org.shape[0] * ratio_y), int(img_org.shape[1] * ratio_x)) if not self.dir_in: prediction_regions_org_y = self.do_prediction(True, img, model_region) else: prediction_regions_org_y = self.do_prediction(True, img, self.model_region) - prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h ) - - #plt.imshow(prediction_regions_org_y[:,:,0]) - #plt.show() - prediction_regions_org_y = prediction_regions_org_y[:,:,0] - mask_zeros_y = (prediction_regions_org_y[:,:]==0)*1 - - ##img_only_regions_with_sep = ( (prediction_regions_org_y[:,:] != 3) & (prediction_regions_org_y[:,:] != 0) )*1 - img_only_regions_with_sep = ( prediction_regions_org_y[:,:] == 1 )*1 + prediction_regions_org_y = resize_image(prediction_regions_org_y, img_height_h, img_width_h) + + # plt.imshow(prediction_regions_org_y[:,:,0]) + # plt.show() + prediction_regions_org_y = prediction_regions_org_y[:, :, 0] + mask_zeros_y = (prediction_regions_org_y[:, :] == 0) * 1 + + # img_only_regions_with_sep = ( (prediction_regions_org_y[:,:] != 3) & (prediction_regions_org_y[:,:] != 0) )*1 + img_only_regions_with_sep = (prediction_regions_org_y[:, :] == 1) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - + try: - img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=20) + img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], KERNEL, iterations=20) _, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) - - img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1]*(1.2 if is_image_enhanced else 1))) - + + img = resize_image(img_org, int(img_org.shape[0]), + int(img_org.shape[1] * (1.2 if is_image_enhanced else 1))) + if self.dir_in: prediction_regions_org = self.do_prediction(True, img, self.model_region) else: prediction_regions_org = self.do_prediction(True, img, model_region) - prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) + prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h) + + prediction_regions_org = prediction_regions_org[:, :, 0] + prediction_regions_org[(prediction_regions_org[:, :] == 1) & (mask_zeros_y[:, :] == 1)] = 0 - prediction_regions_org=prediction_regions_org[:,:,0] - prediction_regions_org[(prediction_regions_org[:,:]==1) & (mask_zeros_y[:,:]==1)]=0 - - if not self.dir_in: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p2) img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) - + if self.dir_in: prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, 0.2) else: prediction_regions_org2 = self.do_prediction(True, img, model_region, 0.2) - prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) - + prediction_regions_org2 = resize_image(prediction_regions_org2, img_height_h, img_width_h) - mask_zeros2 = (prediction_regions_org2[:,:,0] == 0) - mask_lines2 = (prediction_regions_org2[:,:,0] == 3) - text_sume_early = (prediction_regions_org[:,:] == 1).sum() + mask_zeros2 = (prediction_regions_org2[:, :, 0] == 0) + mask_lines2 = (prediction_regions_org2[:, :, 0] == 3) + text_sume_early = (prediction_regions_org[:, :] == 1).sum() prediction_regions_org_copy = np.copy(prediction_regions_org) - prediction_regions_org_copy[(prediction_regions_org_copy[:,:]==1) & (mask_zeros2[:,:]==1)] = 0 - text_sume_second = ((prediction_regions_org_copy[:,:]==1)*1).sum() + prediction_regions_org_copy[(prediction_regions_org_copy[:, :] == 1) & (mask_zeros2[:, :] == 1)] = 0 + text_sume_second = ((prediction_regions_org_copy[:, :] == 1) * 1).sum() rate_two_models = text_sume_second / float(text_sume_early) * 100 self.logger.info("ratio_of_two_models: %s", rate_two_models) - if not(is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD): + if not (is_image_enhanced and rate_two_models < RATIO_OF_TWO_MODEL_THRESHOLD): prediction_regions_org = np.copy(prediction_regions_org_copy) - - - prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3 - mask_lines_only=(prediction_regions_org[:,:]==3)*1 - prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2) + prediction_regions_org[(mask_lines2[:, :] == 1) & (prediction_regions_org[:, :] == 0)] = 3 + mask_lines_only = (prediction_regions_org[:, :] == 3) * 1 + prediction_regions_org = cv2.erode(prediction_regions_org[:, :], KERNEL, iterations=2) + prediction_regions_org = cv2.dilate(prediction_regions_org[:, :], KERNEL, iterations=2) - prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2) - - - if rate_two_models<=40: + if rate_two_models <= 40: if self.input_binary: prediction_bin = np.copy(img_org) else: @@ -1754,141 +1822,142 @@ class Eynollah: prediction_bin = self.do_prediction(True, img_org, model_bin) else: prediction_bin = self.do_prediction(True, img_org, self.model_bin) - prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) - - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - + prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h) + + prediction_bin = prediction_bin[:, :, 0] + prediction_bin = (prediction_bin[:, :] == 0) * 1 + prediction_bin = prediction_bin * 255 + + prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + if not self.dir_in: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) - ratio_y=1 - ratio_x=1 + ratio_y = 1 + ratio_x = 1 + img = resize_image(prediction_bin, int(img_org.shape[0] * ratio_y), int(img_org.shape[1] * ratio_x)) - img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) - if not self.dir_in: prediction_regions_org = self.do_prediction(True, img, model_region) else: prediction_regions_org = self.do_prediction(True, img, self.model_region) - prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) - prediction_regions_org=prediction_regions_org[:,:,0] - - mask_lines_only=(prediction_regions_org[:,:]==3)*1 - - mask_texts_only=(prediction_regions_org[:,:]==1)*1 - mask_images_only=(prediction_regions_org[:,:]==2)*1 - - - + prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h) + prediction_regions_org = prediction_regions_org[:, :, 0] + + mask_lines_only = (prediction_regions_org[:, :] == 3) * 1 + + mask_texts_only = (prediction_regions_org[:, :] == 1) * 1 + mask_images_only = (prediction_regions_org[:, :] == 2) * 1 + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, + hir_lines_xml, max_area=1, + min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) - text_regions_p_true = cv2.fillPoly(text_regions_p_true,pts = polygons_of_only_lines, color=(3, 3, 3)) - text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3, 3, 3)) + text_regions_p_true[:, :][mask_images_only[:, :] == 1] = 2 - text_regions_p_true=cv2.fillPoly(text_regions_p_true,pts=polygons_of_only_texts, color=(1,1,1)) + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1, 1, 1)) return text_regions_p_true, erosion_hurts, polygons_lines_xml except: - + if self.input_binary: prediction_bin = np.copy(img_org) - + if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_org, model_bin) else: prediction_bin = self.do_prediction(True, img_org, self.model_bin) - prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) - prediction_bin=prediction_bin[:,:,0] - - prediction_bin = (prediction_bin[:,:]==0)*1 - - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - + prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h) + prediction_bin = prediction_bin[:, :, 0] + + prediction_bin = (prediction_bin[:, :] == 0) * 1 + + prediction_bin = prediction_bin * 255 + + prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + if not self.dir_in: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) - + else: prediction_bin = np.copy(img_org) - ratio_y=1 - ratio_x=1 - + ratio_y = 1 + ratio_x = 1 - img = resize_image(prediction_bin, int(img_org.shape[0]*ratio_y), int(img_org.shape[1]*ratio_x)) + img = resize_image(prediction_bin, int(img_org.shape[0] * ratio_y), int(img_org.shape[1] * ratio_x)) if not self.dir_in: prediction_regions_org = self.do_prediction(True, img, model_region) else: prediction_regions_org = self.do_prediction(True, img, self.model_region) - prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) - prediction_regions_org=prediction_regions_org[:,:,0] - - #mask_lines_only=(prediction_regions_org[:,:]==3)*1 - #img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) - - #prediction_regions_org = self.do_prediction(True, img, model_region) - - #prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) - - #prediction_regions_org = prediction_regions_org[:,:,0] - - #prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 - - - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - - mask_images_only=(prediction_regions_org[:,:] ==2)*1 - + prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h) + prediction_regions_org = prediction_regions_org[:, :, 0] + + # mask_lines_only = (prediction_regions_org[:,:] == 3) * 1 + # img = resize_image(img_org, int(img_org.shape[0]*1), int(img_org.shape[1]*1)) + + # prediction_regions_org = self.do_prediction(True, img, model_region) + + # prediction_regions_org = resize_image(prediction_regions_org, img_height_h, img_width_h ) + + # prediction_regions_org = prediction_regions_org[:,:,0] + + # prediction_regions_org[(prediction_regions_org[:,:] == 1) & (mask_zeros_y[:,:] == 1)]=0 + + mask_lines_only = (prediction_regions_org[:, :] == 3) * 1 + + mask_texts_only = (prediction_regions_org[:, :] == 1) * 1 + + mask_images_only = (prediction_regions_org[:, :] == 2) * 1 + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - - - polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) - - + polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, + hir_lines_xml, max_area=1, + min_area=0.00001) + + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) + + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) + text_regions_p_true = np.zeros(prediction_regions_org.shape) - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) - - text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_lines, color=(3, 3, 3)) + + text_regions_p_true[:, :][mask_images_only[:, :] == 1] = 2 + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts=polygons_of_only_texts, color=(1, 1, 1)) + erosion_hurts = True return text_regions_p_true, erosion_hurts, polygons_lines_xml - def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): + def do_order_of_regions_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, + textline_mask_tot): self.logger.debug("enter do_order_of_regions_full_layout") - cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours(contours_only_text_parent) - cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = find_new_features_of_contours(contours_only_text_parent_h) + cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( + contours_only_text_parent) + cx_text_only_h, cy_text_only_h, x_min_text_only_h, _, _, _, y_cor_x_min_main_h = find_new_features_of_contours( + contours_only_text_parent_h) try: arg_text_con = [] for ii in range(len(cx_text_only)): for jj in range(len(boxes)): - if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: + if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and \ + y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: arg_text_con.append(jj) break args_contours = np.array(range(len(arg_text_con))) arg_text_con_h = [] for ii in range(len(cx_text_only_h)): for jj in range(len(boxes)): - if (x_min_text_only_h[ii] + 80) >= boxes[jj][0] and (x_min_text_only_h[ii] + 80) < boxes[jj][1] and y_cor_x_min_main_h[ii] >= boxes[jj][2] and y_cor_x_min_main_h[ii] < boxes[jj][3]: + if (x_min_text_only_h[ii] + 80) >= boxes[jj][0] and (x_min_text_only_h[ii] + 80) < boxes[jj][1] and \ + y_cor_x_min_main_h[ii] >= boxes[jj][2] and y_cor_x_min_main_h[ii] < boxes[jj][3]: arg_text_con_h.append(jj) break args_contours_h = np.array(range(len(arg_text_con_h))) @@ -1912,9 +1981,13 @@ class Eynollah: for box in args_contours_box_h: con_inter_box_h.append(contours_only_text_parent_h[box]) - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[int(boxes[iij][2]): int(boxes[iij][3]), int(boxes[iij][0]): int(boxes[iij][1])], + con_inter_box, con_inter_box_h, boxes[iij][2]) - order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, + indexes_sorted, index_by_kind_sorted, + kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] @@ -1923,11 +1996,13 @@ class Eynollah: for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for zahler, _ in enumerate(args_contours_box_h): arg_order_v = indexes_sorted_head[zahler] - order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji in range(len(id_of_texts)): order_of_texts_tot.append(order_of_texts[jji] + ref_point) @@ -1950,19 +2025,23 @@ class Eynollah: arg_text_con = [] for ii in range(len(cx_text_only)): for jj in range(len(boxes)): - if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located + if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= \ + boxes[jj][2] and cy_text_only[ii] < boxes[jj][ + 3]: # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) break args_contours = np.array(range(len(arg_text_con))) order_by_con_main = np.zeros(len(arg_text_con)) - ############################# head + # ############################ head arg_text_con_h = [] for ii in range(len(cx_text_only_h)): for jj in range(len(boxes)): - if cx_text_only_h[ii] >= boxes[jj][0] and cx_text_only_h[ii] < boxes[jj][1] and cy_text_only_h[ii] >= boxes[jj][2] and cy_text_only_h[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located + if cx_text_only_h[ii] >= boxes[jj][0] and cx_text_only_h[ii] < boxes[jj][1] and cy_text_only_h[ + ii] >= boxes[jj][2] and cy_text_only_h[ii] < boxes[jj][ + 3]: # this is valid if the center of region identify in which box it is located arg_text_con_h.append(jj) break args_contours_h = np.array(range(len(arg_text_con_h))) @@ -1984,9 +2063,13 @@ class Eynollah: for box in args_contours_box_h: con_inter_box_h.append(contours_only_text_parent_h[box]) - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[int(boxes[iij][2]): int(boxes[iij][3]), int(boxes[iij][0]): int(boxes[iij][1])], + con_inter_box, con_inter_box_h, boxes[iij][2]) - order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, + indexes_sorted, index_by_kind_sorted, + kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] @@ -1995,11 +2078,13 @@ class Eynollah: for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for zahler, _ in enumerate(args_contours_box_h): arg_order_v = indexes_sorted_head[zahler] - order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_head[args_contours_box_h[indexes_by_type_head[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji, _ in enumerate(id_of_texts): order_of_texts_tot.append(order_of_texts[jji] + ref_point) @@ -2018,15 +2103,18 @@ class Eynollah: order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) return order_text_new, id_of_texts_tot - def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): + def do_order_of_regions_no_full_layout(self, contours_only_text_parent, contours_only_text_parent_h, boxes, + textline_mask_tot): self.logger.debug("enter do_order_of_regions_no_full_layout") - cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours(contours_only_text_parent) + cx_text_only, cy_text_only, x_min_text_only, _, _, _, y_cor_x_min_main = find_new_features_of_contours( + contours_only_text_parent) try: arg_text_con = [] for ii in range(len(cx_text_only)): for jj in range(len(boxes)): - if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: + if (x_min_text_only[ii] + 80) >= boxes[jj][0] and (x_min_text_only[ii] + 80) < boxes[jj][1] and \ + y_cor_x_min_main[ii] >= boxes[jj][2] and y_cor_x_min_main[ii] < boxes[jj][3]: arg_text_con.append(jj) break args_contours = np.array(range(len(arg_text_con))) @@ -2042,16 +2130,21 @@ class Eynollah: for i in range(len(args_contours_box)): con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[int(boxes[iij][2]): int(boxes[iij][3]), int(boxes[iij][0]): int(boxes[iij][1])], + con_inter_box, con_inter_box_h, boxes[iij][2]) - order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, + indexes_sorted, index_by_kind_sorted, + kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji, _ in enumerate(id_of_texts): order_of_texts_tot.append(order_of_texts[jji] + ref_point) @@ -2065,13 +2158,15 @@ class Eynollah: order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - + except Exception as why: self.logger.error(why) arg_text_con = [] for ii in range(len(cx_text_only)): for jj in range(len(boxes)): - if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located + if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= \ + boxes[jj][2] and cy_text_only[ii] < boxes[jj][ + 3]: # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) break args_contours = np.array(range(len(arg_text_con))) @@ -2089,16 +2184,21 @@ class Eynollah: for i in range(len(args_contours_box)): con_inter_box.append(contours_only_text_parent[args_contours_box[i]]) - indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions(textline_mask_tot[int(boxes[iij][2]) : int(boxes[iij][3]), int(boxes[iij][0]) : int(boxes[iij][1])], con_inter_box, con_inter_box_h, boxes[iij][2]) + indexes_sorted, matrix_of_orders, kind_of_texts_sorted, index_by_kind_sorted = order_of_regions( + textline_mask_tot[int(boxes[iij][2]): int(boxes[iij][3]), int(boxes[iij][0]): int(boxes[iij][1])], + con_inter_box, con_inter_box_h, boxes[iij][2]) - order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, indexes_sorted, index_by_kind_sorted, kind_of_texts_sorted, ref_point) + order_of_texts, id_of_texts = order_and_id_of_texts(con_inter_box, con_inter_box_h, matrix_of_orders, + indexes_sorted, index_by_kind_sorted, + kind_of_texts_sorted, ref_point) indexes_sorted_main = np.array(indexes_sorted)[np.array(kind_of_texts_sorted) == 1] indexes_by_type_main = np.array(index_by_kind_sorted)[np.array(kind_of_texts_sorted) == 1] for zahler, _ in enumerate(args_contours_box): arg_order_v = indexes_sorted_main[zahler] - order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = np.where(indexes_sorted == arg_order_v)[0][0] + ref_point + order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = \ + np.where(indexes_sorted == arg_order_v)[0][0] + ref_point for jji, _ in enumerate(id_of_texts): order_of_texts_tot.append(order_of_texts[jji] + ref_point) @@ -2106,325 +2206,371 @@ class Eynollah: ref_point += len(id_of_texts) order_of_texts_tot = [] - + for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) order_text_new = [] for iii in range(len(order_of_texts_tot)): order_text_new.append(np.where(np.array(order_of_texts_tot) == iii)[0][0]) - + return order_text_new, id_of_texts_tot - def check_iou_of_bounding_box_and_contour_for_tables(self, layout, table_prediction_early, pixel_tabel, num_col_classifier): - layout_org = np.copy(layout) - layout_org[:,:,0][layout_org[:,:,0]==pixel_tabel] = 0 - layout = (layout[:,:,0]==pixel_tabel)*1 - layout =np.repeat(layout[:, :, np.newaxis], 3, axis=2) + def check_iou_of_bounding_box_and_contour_for_tables(self, layout, table_prediction_early, pixel_tabel, + num_col_classifier): + layout_org = np.copy(layout) + layout_org[:, :, 0][layout_org[:, :, 0] == pixel_tabel] = 0 + layout = (layout[:, :, 0] == pixel_tabel) * 1 + + layout = np.repeat(layout[:, :, np.newaxis], 3, axis=2) layout = layout.astype(np.uint8) - imgray = cv2.cvtColor(layout, cv2.COLOR_BGR2GRAY ) + imgray = cv2.cvtColor(layout, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) - + contours_new = [] for i in range(len(contours)): x, y, w, h = cv2.boundingRect(contours[i]) - iou = cnt_size[i] /float(w*h) *100 - - if iou<80: + iou = cnt_size[i] / float(w * h) * 100 + + if iou < 80: layout_contour = np.zeros((layout_org.shape[0], layout_org.shape[1])) - layout_contour= cv2.fillPoly(layout_contour,pts=[contours[i]] ,color=(1,1,1)) - - + layout_contour = cv2.fillPoly(layout_contour, pts=[contours[i]], color=(1, 1, 1)) + layout_contour_sum = layout_contour.sum(axis=0) layout_contour_sum_diff = np.diff(layout_contour_sum) - layout_contour_sum_diff= np.abs(layout_contour_sum_diff) - layout_contour_sum_diff_smoothed= gaussian_filter1d(layout_contour_sum_diff, 10) + layout_contour_sum_diff = np.abs(layout_contour_sum_diff) + layout_contour_sum_diff_smoothed = gaussian_filter1d(layout_contour_sum_diff, 10) peaks, _ = find_peaks(layout_contour_sum_diff_smoothed, height=0) - peaks= peaks[layout_contour_sum_diff_smoothed[peaks]>4] - + peaks = peaks[layout_contour_sum_diff_smoothed[peaks] > 4] + for j in range(len(peaks)): - layout_contour[:,peaks[j]-3+1:peaks[j]+1+3] = 0 - - layout_contour=cv2.erode(layout_contour[:,:], KERNEL, iterations=5) - layout_contour=cv2.dilate(layout_contour[:,:], KERNEL, iterations=5) - - layout_contour =np.repeat(layout_contour[:, :, np.newaxis], 3, axis=2) + layout_contour[:, peaks[j] - 3 + 1: peaks[j] + 1 + 3] = 0 + + layout_contour = cv2.erode(layout_contour[:, :], KERNEL, iterations=5) + layout_contour = cv2.dilate(layout_contour[:, :], KERNEL, iterations=5) + + layout_contour = np.repeat(layout_contour[:, :, np.newaxis], 3, axis=2) layout_contour = layout_contour.astype(np.uint8) - - imgray = cv2.cvtColor(layout_contour, cv2.COLOR_BGR2GRAY ) + + imgray = cv2.cvtColor(layout_contour, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(imgray, 0, 255, 0) contours_sep, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - for ji in range(len(contours_sep) ): + for ji in range(len(contours_sep)): contours_new.append(contours_sep[ji]) - if num_col_classifier>=2: - only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) - only_recent_contour_image= cv2.fillPoly(only_recent_contour_image,pts=[contours_sep[ji]] ,color=(1,1,1)) - table_pixels_masked_from_early_pre = only_recent_contour_image[:,:]*table_prediction_early[:,:] - iou_in = table_pixels_masked_from_early_pre.sum() /float(only_recent_contour_image.sum()) *100 - #print(iou_in,'iou_in_in1') - - if iou_in>30: - layout_org= cv2.fillPoly(layout_org,pts=[contours_sep[ji]] ,color=(pixel_tabel,pixel_tabel,pixel_tabel)) + if num_col_classifier >= 2: + only_recent_contour_image = np.zeros((layout.shape[0], layout.shape[1])) + only_recent_contour_image = cv2.fillPoly(only_recent_contour_image, pts=[contours_sep[ji]], + color=(1, 1, 1)) + table_pixels_masked_from_early_pre = only_recent_contour_image[:, :] * table_prediction_early[:, + :] + iou_in = table_pixels_masked_from_early_pre.sum() / float(only_recent_contour_image.sum()) * 100 + # print(iou_in,'iou_in_in1') + + if iou_in > 30: + layout_org = cv2.fillPoly(layout_org, pts=[contours_sep[ji]], + color=(pixel_tabel, pixel_tabel, pixel_tabel)) else: pass else: - - layout_org= cv2.fillPoly(layout_org,pts=[contours_sep[ji]] ,color=(pixel_tabel,pixel_tabel,pixel_tabel)) - + + layout_org = cv2.fillPoly(layout_org, pts=[contours_sep[ji]], + color=(pixel_tabel, pixel_tabel, pixel_tabel)) + else: contours_new.append(contours[i]) - if num_col_classifier>=2: - only_recent_contour_image = np.zeros((layout.shape[0],layout.shape[1])) - only_recent_contour_image= cv2.fillPoly(only_recent_contour_image,pts=[contours[i]] ,color=(1,1,1)) - - table_pixels_masked_from_early_pre = only_recent_contour_image[:,:]*table_prediction_early[:,:] - iou_in = table_pixels_masked_from_early_pre.sum() /float(only_recent_contour_image.sum()) *100 - #print(iou_in,'iou_in') - if iou_in>30: - layout_org= cv2.fillPoly(layout_org,pts=[contours[i]] ,color=(pixel_tabel,pixel_tabel,pixel_tabel)) + if num_col_classifier >= 2: + only_recent_contour_image = np.zeros((layout.shape[0], layout.shape[1])) + only_recent_contour_image = cv2.fillPoly(only_recent_contour_image, pts=[contours[i]], + color=(1, 1, 1)) + + table_pixels_masked_from_early_pre = only_recent_contour_image[:, :] * table_prediction_early[:, :] + iou_in = table_pixels_masked_from_early_pre.sum() / float(only_recent_contour_image.sum()) * 100 + # print(iou_in,'iou_in') + if iou_in > 30: + layout_org = cv2.fillPoly(layout_org, pts=[contours[i]], + color=(pixel_tabel, pixel_tabel, pixel_tabel)) else: pass else: - layout_org= cv2.fillPoly(layout_org,pts=[contours[i]] ,color=(pixel_tabel,pixel_tabel,pixel_tabel)) - + layout_org = cv2.fillPoly(layout_org, pts=[contours[i]], + color=(pixel_tabel, pixel_tabel, pixel_tabel)) + return layout_org, contours_new - def delete_separator_around(self,spliter_y,peaks_neg,image_by_region, pixel_line, pixel_table): + + def delete_separator_around(self, spliter_y, peaks_neg, image_by_region, pixel_line, pixel_table): # format of subboxes: box=[x1, x2 , y1, y2] pix_del = 100 - if len(image_by_region.shape)==3: - for i in range(len(spliter_y)-1): - for j in range(1,len(peaks_neg[i])-1): - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0]==pixel_line ]=0 - image_by_region[spliter_y[i]:spliter_y[i+1],peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,1]==pixel_line ]=0 - image_by_region[spliter_y[i]:spliter_y[i+1],peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,2]==pixel_line ]=0 - - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0]==pixel_table ]=0 - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,1]==pixel_table ]=0 - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,0][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del,2]==pixel_table ]=0 + if len(image_by_region.shape) == 3: + for i in range(len(spliter_y) - 1): + for j in range(1, len(peaks_neg[i]) - 1): + image_by_region[int(spliter_y[i]):int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del:peaks_neg[i][j] + pix_del, 0][ + image_by_region[int(spliter_y[i]): int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del: peaks_neg[i][j] + pix_del, 0] == pixel_line] = 0 + image_by_region[spliter_y[i]:spliter_y[i + 1], peaks_neg[i][j] - pix_del:peaks_neg[i][j] + pix_del, + 0][image_by_region[int(spliter_y[i]): int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del: peaks_neg[i][j] + pix_del, 1] == pixel_line] = 0 + image_by_region[spliter_y[i]:spliter_y[i + 1], peaks_neg[i][j] - pix_del:peaks_neg[i][j] + pix_del, + 0][image_by_region[int(spliter_y[i]): int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del: peaks_neg[i][j] + pix_del, 2] == pixel_line] = 0 + + image_by_region[int(spliter_y[i]):int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del:peaks_neg[i][j] + pix_del, 0][ + image_by_region[int(spliter_y[i]): int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del: peaks_neg[i][j] + pix_del, 0] == pixel_table] = 0 + image_by_region[int(spliter_y[i]):int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del:peaks_neg[i][j] + pix_del, 0][ + image_by_region[int(spliter_y[i]): int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del: peaks_neg[i][j] + pix_del, 1] == pixel_table] = 0 + image_by_region[int(spliter_y[i]):int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del:peaks_neg[i][j] + pix_del, 0][ + image_by_region[int(spliter_y[i]): int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del: peaks_neg[i][j] + pix_del, 2] == pixel_table] = 0 else: - for i in range(len(spliter_y)-1): - for j in range(1,len(peaks_neg[i])-1): - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del]==pixel_line ]=0 - - image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del][image_by_region[int(spliter_y[i]):int(spliter_y[i+1]),peaks_neg[i][j]-pix_del:peaks_neg[i][j]+pix_del]==pixel_table ]=0 + for i in range(len(spliter_y) - 1): + for j in range(1, len(peaks_neg[i]) - 1): + image_by_region[int(spliter_y[i]):int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del:peaks_neg[i][j] + pix_del][ + image_by_region[int(spliter_y[i]):int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del:peaks_neg[i][j] + pix_del] == pixel_line] = 0 + + image_by_region[int(spliter_y[i]):int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del:peaks_neg[i][j] + pix_del][ + image_by_region[int(spliter_y[i]):int(spliter_y[i + 1]), + peaks_neg[i][j] - pix_del:peaks_neg[i][j] + pix_del] == pixel_table] = 0 return image_by_region - def add_tables_heuristic_to_layout(self, image_regions_eraly_p,boxes, slope_mean_hor, spliter_y,peaks_neg_tot, image_revised, num_col_classifier, min_area, pixel_line): - pixel_table =10 + + def add_tables_heuristic_to_layout(self, image_regions_eraly_p, boxes, slope_mean_hor, spliter_y, peaks_neg_tot, + image_revised, num_col_classifier, min_area, pixel_line): + pixel_table = 10 image_revised_1 = self.delete_separator_around(spliter_y, peaks_neg_tot, image_revised, pixel_line, pixel_table) - + try: - image_revised_1[:,:30][image_revised_1[:,:30]==pixel_line] = 0 - image_revised_1[:,image_revised_1.shape[1]-30:][image_revised_1[:,image_revised_1.shape[1]-30:]==pixel_line] = 0 + image_revised_1[:, :30][image_revised_1[:, :30] == pixel_line] = 0 + image_revised_1[:, image_revised_1.shape[1] - 30:][ + image_revised_1[:, image_revised_1.shape[1] - 30:] == pixel_line] = 0 except: pass - + img_comm_e = np.zeros(image_revised_1.shape) img_comm = np.repeat(img_comm_e[:, :, np.newaxis], 3, axis=2) for indiv in np.unique(image_revised_1): - image_col=(image_revised_1==indiv)*255 - img_comm_in=np.repeat(image_col[:, :, np.newaxis], 3, axis=2) - img_comm_in=img_comm_in.astype(np.uint8) + image_col = (image_revised_1 == indiv) * 255 + img_comm_in = np.repeat(image_col[:, :, np.newaxis], 3, axis=2) + img_comm_in = img_comm_in.astype(np.uint8) imgray = cv2.cvtColor(img_comm_in, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours,hirarchy=cv2.findContours(thresh.copy(), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + contours, hirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - if indiv==pixel_table: - main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = 0.001) + if indiv == pixel_table: + main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area=1, + min_area=0.001) else: - main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area = 1, min_area = min_area) + main_contours = filter_contours_area_of_image_tables(thresh, contours, hirarchy, max_area=1, + min_area=min_area) - img_comm = cv2.fillPoly(img_comm, pts = main_contours, color = (indiv, indiv, indiv)) + img_comm = cv2.fillPoly(img_comm, pts=main_contours, color=(indiv, indiv, indiv)) img_comm = img_comm.astype(np.uint8) - + if not self.isNaN(slope_mean_hor): - image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1],3)) + image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1], 3)) for i in range(len(boxes)): - image_box=img_comm[int(boxes[i][2]):int(boxes[i][3]),int(boxes[i][0]):int(boxes[i][1]),:] + image_box = img_comm[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]): int(boxes[i][1]), :] try: - image_box_tabels_1=(image_box[:,:,0]==pixel_table)*1 - contours_tab,_=return_contours_of_image(image_box_tabels_1) - contours_tab=filter_contours_area_of_image_tables(image_box_tabels_1,contours_tab,_,1,0.003) - image_box_tabels_1=(image_box[:,:,0]==pixel_line)*1 + image_box_tabels_1 = (image_box[:, :, 0] == pixel_table) * 1 + contours_tab, _ = return_contours_of_image(image_box_tabels_1) + contours_tab = filter_contours_area_of_image_tables(image_box_tabels_1, contours_tab, _, 1, 0.003) + image_box_tabels_1 = (image_box[:, :, 0] == pixel_line) * 1 - image_box_tabels_and_m_text=( (image_box[:,:,0]==pixel_table) | (image_box[:,:,0]==1) )*1 - image_box_tabels_and_m_text=image_box_tabels_and_m_text.astype(np.uint8) + image_box_tabels_and_m_text = ((image_box[:, :, 0] == pixel_table) | (image_box[:, :, 0] == 1)) * 1 + image_box_tabels_and_m_text = image_box_tabels_and_m_text.astype(np.uint8) - image_box_tabels_1=image_box_tabels_1.astype(np.uint8) - image_box_tabels_1 = cv2.dilate(image_box_tabels_1,KERNEL,iterations = 5) + image_box_tabels_1 = image_box_tabels_1.astype(np.uint8) + image_box_tabels_1 = cv2.dilate(image_box_tabels_1, KERNEL, iterations=5) - contours_table_m_text,_=return_contours_of_image(image_box_tabels_and_m_text) - image_box_tabels=np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2) + contours_table_m_text, _ = return_contours_of_image(image_box_tabels_and_m_text) + image_box_tabels = np.repeat(image_box_tabels_1[:, :, np.newaxis], 3, axis=2) - image_box_tabels=image_box_tabels.astype(np.uint8) + image_box_tabels = image_box_tabels.astype(np.uint8) imgray = cv2.cvtColor(image_box_tabels, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_line,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + contours_line, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - y_min_main_line ,y_max_main_line=find_features_of_contours(contours_line) - y_min_main_tab ,y_max_main_tab=find_features_of_contours(contours_tab) + y_min_main_line, y_max_main_line = find_features_of_contours(contours_line) + y_min_main_tab, y_max_main_tab = find_features_of_contours(contours_tab) - cx_tab_m_text,cy_tab_m_text ,x_min_tab_m_text , x_max_tab_m_text, y_min_tab_m_text ,y_max_tab_m_text, _= find_new_features_of_contours(contours_table_m_text) - cx_tabl,cy_tabl ,x_min_tabl , x_max_tabl, y_min_tabl ,y_max_tabl,_= find_new_features_of_contours(contours_tab) + cx_tab_m_text, cy_tab_m_text, x_min_tab_m_text, x_max_tab_m_text, y_min_tab_m_text, y_max_tab_m_text, _ = find_new_features_of_contours( + contours_table_m_text) + cx_tabl, cy_tabl, x_min_tabl, x_max_tabl, y_min_tabl, y_max_tabl, _ = find_new_features_of_contours( + contours_tab) - if len(y_min_main_tab )>0: - y_down_tabs=[] - y_up_tabs=[] + if len(y_min_main_tab) > 0: + y_down_tabs = [] + y_up_tabs = [] - for i_t in range(len(y_min_main_tab )): - y_down_tab=[] - y_up_tab=[] + for i_t in range(len(y_min_main_tab)): + y_down_tab = [] + y_up_tab = [] for i_l in range(len(y_min_main_line)): - if y_min_main_tab[i_t]>y_min_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l] and y_min_main_tab[i_t]>y_max_main_line[i_l] and y_max_main_tab[i_t]>y_min_main_line[i_l]: + if y_min_main_tab[i_t] > y_min_main_line[i_l] and y_max_main_tab[i_t] > y_min_main_line[ + i_l] and y_min_main_tab[i_t] > y_max_main_line[i_l] and y_max_main_tab[i_t] > \ + y_min_main_line[i_l]: pass - elif y_min_main_tab[i_t]0: + _, _, _, _, y_min_tab_col1, y_max_tab_col1, _ = find_new_features_of_contours(contours_table_col1) + + if len(y_min_tab_col1) > 0: for ijv in range(len(y_min_tab_col1)): - image_revised_last[int(y_min_tab_col1[ijv]):int(y_max_tab_col1[ijv]),:,:]=pixel_table + image_revised_last[int(y_min_tab_col1[ijv]): int(y_max_tab_col1[ijv]), :, :] = pixel_table return image_revised_last + def do_order_of_regions(self, *args, **kwargs): if self.full_layout: return self.do_order_of_regions_full_layout(*args, **kwargs) return self.do_order_of_regions_no_full_layout(*args, **kwargs) - + def get_tables_from_model(self, img, num_col_classifier): img_org = np.copy(img) - + img_height_h = img_org.shape[0] img_width_h = img_org.shape[1] - + model_region, session_region = self.start_new_session_and_model(self.model_tables) - + patches = False - + if num_col_classifier < 4 and num_col_classifier > 2: prediction_table = self.do_prediction(patches, img, model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), model_region) + pre_updown = self.do_prediction(patches, cv2.flip(img[:, :, :], -1), model_region) pre_updown = cv2.flip(pre_updown, -1) - - prediction_table[:,:,0][pre_updown[:,:,0]==1]=1 + + prediction_table[:, :, 0][pre_updown[:, :, 0] == 1] = 1 prediction_table = prediction_table.astype(np.int16) - - elif num_col_classifier ==2: - height_ext = 0#int( img.shape[0]/4. ) - h_start = int(height_ext/2.) - width_ext = int( img.shape[1]/8. ) - w_start = int(width_ext/2.) - - height_new = img.shape[0]+height_ext - width_new = img.shape[1]+width_ext - - img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 - img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] + + elif num_col_classifier == 2: + height_ext = 0 # int(img.shape[0]/4.) + h_start = int(height_ext / 2.) + width_ext = int(img.shape[1] / 8.) + w_start = int(width_ext / 2.) + + height_new = img.shape[0] + height_ext + width_new = img.shape[1] + width_ext + + img_new = np.ones((height_new, width_new, img.shape[2])).astype(float) * 0 + img_new[h_start:h_start + img.shape[0], w_start: w_start + img.shape[1], :] = img[:, :, :] prediction_ext = self.do_prediction(patches, img_new, model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), model_region) + pre_updown = self.do_prediction(patches, cv2.flip(img_new[:, :, :], -1), model_region) pre_updown = cv2.flip(pre_updown, -1) - - prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - - prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 + + prediction_table = prediction_ext[h_start:h_start + img.shape[0], w_start: w_start + img.shape[1], :] + prediction_table_updown = pre_updown[h_start:h_start + img.shape[0], w_start: w_start + img.shape[1], :] + + prediction_table[:, :, 0][prediction_table_updown[:, :, 0] == 1] = 1 prediction_table = prediction_table.astype(np.int16) - elif num_col_classifier ==1: - height_ext = 0# int( img.shape[0]/4. ) - h_start = int(height_ext/2.) - width_ext = int( img.shape[1]/4. ) - w_start = int(width_ext/2.) - - height_new = img.shape[0]+height_ext - width_new = img.shape[1]+width_ext - - img_new =np.ones((height_new,width_new,img.shape[2])).astype(float)*0 - img_new[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] =img[:,:,:] + elif num_col_classifier == 1: + height_ext = 0 # int(img.shape[0]/4.) + h_start = int(height_ext / 2.) + width_ext = int(img.shape[1] / 4.) + w_start = int(width_ext / 2.) + + height_new = img.shape[0] + height_ext + width_new = img.shape[1] + width_ext + + img_new = np.ones((height_new, width_new, img.shape[2])).astype(float) * 0 + img_new[h_start:h_start + img.shape[0], w_start: w_start + img.shape[1], :] = img[:, :, :] prediction_ext = self.do_prediction(patches, img_new, model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img_new[:,:,:], -1), model_region) + pre_updown = self.do_prediction(patches, cv2.flip(img_new[:, :, :], -1), model_region) pre_updown = cv2.flip(pre_updown, -1) - - prediction_table = prediction_ext[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - prediction_table_updown = pre_updown[h_start:h_start+img.shape[0] ,w_start: w_start+img.shape[1], : ] - - prediction_table[:,:,0][prediction_table_updown[:,:,0]==1]=1 + + prediction_table = prediction_ext[h_start:h_start + img.shape[0], w_start: w_start + img.shape[1], :] + prediction_table_updown = pre_updown[h_start:h_start + img.shape[0], w_start: w_start + img.shape[1], :] + + prediction_table[:, :, 0][prediction_table_updown[:, :, 0] == 1] = 1 prediction_table = prediction_table.astype(np.int16) else: prediction_table = np.zeros(img.shape) - img_w_half = int(img.shape[1]/2.) + img_w_half = int(img.shape[1] / 2.) - pre1 = self.do_prediction(patches, img[:,0:img_w_half,:], model_region) - pre2 = self.do_prediction(patches, img[:,img_w_half:,:], model_region) - pre_full = self.do_prediction(patches, img[:,:,:], model_region) - pre_updown = self.do_prediction(patches, cv2.flip(img[:,:,:], -1), model_region) + pre1 = self.do_prediction(patches, img[:, 0:img_w_half, :], model_region) + pre2 = self.do_prediction(patches, img[:, img_w_half:, :], model_region) + pre_full = self.do_prediction(patches, img[:, :, :], model_region) + pre_updown = self.do_prediction(patches, cv2.flip(img[:, :, :], -1), model_region) pre_updown = cv2.flip(pre_updown, -1) - - prediction_table_full_erode = cv2.erode(pre_full[:,:,0], KERNEL, iterations=4) + + prediction_table_full_erode = cv2.erode(pre_full[:, :, 0], KERNEL, iterations=4) prediction_table_full_erode = cv2.dilate(prediction_table_full_erode, KERNEL, iterations=4) - - prediction_table_full_updown_erode = cv2.erode(pre_updown[:,:,0], KERNEL, iterations=4) + + prediction_table_full_updown_erode = cv2.erode(pre_updown[:, :, 0], KERNEL, iterations=4) prediction_table_full_updown_erode = cv2.dilate(prediction_table_full_updown_erode, KERNEL, iterations=4) - prediction_table[:,0:img_w_half,:] = pre1[:,:,:] - prediction_table[:,img_w_half:,:] = pre2[:,:,:] - - prediction_table[:,:,0][prediction_table_full_erode[:,:]==1]=1 - prediction_table[:,:,0][prediction_table_full_updown_erode[:,:]==1]=1 + prediction_table[:, 0:img_w_half, :] = pre1[:, :, :] + prediction_table[:, img_w_half:, :] = pre2[:, :, :] + + prediction_table[:, :, 0][prediction_table_full_erode[:, :] == 1] = 1 + prediction_table[:, :, 0][prediction_table_full_updown_erode[:, :] == 1] = 1 prediction_table = prediction_table.astype(np.int16) - - #prediction_table_erode = cv2.erode(prediction_table[:,:,0], self.kernel, iterations=6) - #prediction_table_erode = cv2.dilate(prediction_table_erode, self.kernel, iterations=6) - - prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) + + # prediction_table_erode = cv2.erode(prediction_table[:,:,0], self.kernel, iterations=6) + # prediction_table_erode = cv2.dilate(prediction_table_erode, self.kernel, iterations=6) + + prediction_table_erode = cv2.erode(prediction_table[:, :, 0], KERNEL, iterations=20) prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) return prediction_table_erode.astype(np.int16) - def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts): + + def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, + num_column_is_classified, erosion_hurts): img_g = self.imread(grayscale=True, uint8=True) img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) @@ -2434,17 +2580,17 @@ class Eynollah: img_g3[:, :, 2] = img_g[:, :] image_page, page_coord, cont_page = self.extract_page() - + if self.tables: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: table_prediction = (np.zeros((image_page.shape[0], image_page.shape[1]))).astype(np.int16) - + if self.plotter: self.plotter.save_page_image(image_page) - text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + text_regions_p_1 = text_regions_p_1[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] + textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = mask_images.astype(np.uint8) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) @@ -2452,17 +2598,16 @@ class Eynollah: mask_lines = mask_lines.astype(np.uint8) img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - - + if erosion_hurts: - img_only_regions = np.copy(img_only_regions_with_sep[:,:]) + img_only_regions = np.copy(img_only_regions_with_sep[:, :]) else: - img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6) - - ##print(img_only_regions.shape,'img_only_regions') - ##plt.imshow(img_only_regions[:,:]) - ##plt.show() - ##num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) + img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], KERNEL, iterations=6) + + # print(img_only_regions.shape,'img_only_regions') + # plt.imshow(img_only_regions[:,:]) + # plt.show() + # num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) try: num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) num_col = num_col + 1 @@ -2472,6 +2617,7 @@ class Eynollah: self.logger.error(why) num_col = None return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea + def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts): img_g = self.imread(grayscale=True, uint8=True) @@ -2482,16 +2628,16 @@ class Eynollah: img_g3[:, :, 2] = img_g[:, :] image_page, page_coord, cont_page = self.extract_page() - + if self.tables: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: table_prediction = (np.zeros((image_page.shape[0], image_page.shape[1]))).astype(np.int16) - + if self.plotter: self.plotter.save_page_image(image_page) - text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + text_regions_p_1 = text_regions_p_1[page_coord[0]: page_coord[1], page_coord[2]: page_coord[3]] mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = mask_images.astype(np.uint8) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) @@ -2499,14 +2645,12 @@ class Eynollah: mask_lines = mask_lines.astype(np.uint8) img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - - + if erosion_hurts: - img_only_regions = np.copy(img_only_regions_with_sep[:,:]) + img_only_regions = np.copy(img_only_regions_with_sep[:, :]) else: - img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6) - - + img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], KERNEL, iterations=6) + try: num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) num_col = num_col + 1 @@ -2517,9 +2661,10 @@ class Eynollah: num_col = None return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction - def run_enhancement(self,light_version): + def run_enhancement(self, light_version): self.logger.info("Resizing and enhancing image...") - is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier(light_version) + is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier( + light_version) self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ') scale = 1 @@ -2537,7 +2682,8 @@ class Eynollah: else: self.get_image_and_scales(img_org, img_res, scale) if self.allow_scaling: - img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin) + img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, + img_bin) self.get_image_and_scales_after_enhancing(img_org, img_res) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified @@ -2555,7 +2701,8 @@ class Eynollah: def run_deskew(self, textline_mask_tot_ea): sigma = 2 main_page_deskew = True - slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, main_page_deskew, plotter=self.plotter) + slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, + main_page_deskew, plotter=self.plotter) slope_first = 0 if self.plotter: @@ -2563,7 +2710,8 @@ class Eynollah: self.logger.info("slope_deskew: %.2f°", slope_deskew) return slope_deskew, slope_first - def run_marginals(self, image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction): + def run_marginals(self, image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, + text_regions_p_1, table_prediction): image_page_rotated, textline_mask_tot = image_page[:, :], textline_mask_tot_ea[:, :] textline_mask_tot[mask_images[:, :] == 1] = 0 @@ -2575,9 +2723,10 @@ class Eynollah: try: regions_without_separators = (text_regions_p[:, :] == 1) * 1 if self.tables: - regions_without_separators[table_prediction==1] = 1 + regions_without_separators[table_prediction == 1] = 1 regions_without_separators = regions_without_separators.astype(np.uint8) - text_regions_p = get_marginals(rotate_image(regions_without_separators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, kernel=KERNEL) + text_regions_p = get_marginals(rotate_image(regions_without_separators, slope_deskew), text_regions_p, + num_col_classifier, slope_deskew, kernel=KERNEL) except Exception as e: self.logger.error("exception %s", e) @@ -2586,29 +2735,38 @@ class Eynollah: self.plotter.save_plot_of_layout_main(text_regions_p, image_page) return textline_mask_tot, text_regions_p, image_page_rotated - def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts): + def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, + table_prediction, erosion_hurts): self.logger.debug('enter run_boxes_no_full_layout') if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func(image_page, + textline_mask_tot, + text_regions_p, + table_prediction, + slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1]) table_prediction_n = resize_image(table_prediction_n, text_regions_p.shape[0], text_regions_p.shape[1]) regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1 if self.tables: - regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 - regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) + regions_without_separators_d[table_prediction_n[:, :] == 1] = 1 + regions_without_separators = (text_regions_p[:, + :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) if self.tables: - regions_without_separators[table_prediction ==1 ] = 1 + regions_without_separators[table_prediction == 1] = 1 if np.abs(slope_deskew) < SLOPE_THRESHOLD: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_separators_d = None pixel_lines = 3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: - _, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + _, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, + pixel_lines) self.logger.info("num_col_classifier: %s", num_col_classifier) @@ -2621,149 +2779,194 @@ class Eynollah: regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) t1 = time.time() if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, + regions_without_separators, + matrix_of_lines_ch, + num_col_classifier, + erosion_hurts, self.tables, + self.right2left) boxes_d = None self.logger.debug("len(boxes): %s", len(boxes)) - + text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 + text_regions_p_tables[:, :][(table_prediction[:, :] == 1)] = 10 pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) - img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, + peaks_neg_tot_tables, text_regions_p_tables, + num_col_classifier, 0.000005, pixel_line) + img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, + table_prediction, + 10, + num_col_classifier) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, + regions_without_separators_d, + matrix_of_lines_ch_d, + num_col_classifier, + erosion_hurts, self.tables, + self.right2left) boxes = None self.logger.debug("len(boxes): %s", len(boxes_d)) - + text_regions_p_tables = np.copy(text_regions_p_1_n) - text_regions_p_tables =np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 - + text_regions_p_tables = np.round(text_regions_p_tables) + text_regions_p_tables[:, :][(text_regions_p_tables[:, :] != 3) & (table_prediction_n[:, :] == 1)] = 10 + pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) - + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes_d, 0, splitter_y_new_d, + peaks_neg_tot_tables_d, text_regions_p_tables, + num_col_classifier, 0.000005, pixel_line) + img_revised_tab2_d, _ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, + table_prediction_n, 10, + num_col_classifier) + img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], + text_regions_p.shape[1]) self.logger.info("detecting boxes took %.1fs", time.time() - t1) - + if self.tables: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - img_revised_tab = np.copy(img_revised_tab2[:,:,0]) - img_revised_tab[:,:][(text_regions_p[:,:] == 1) & (img_revised_tab[:,:] != 10)] = 1 + img_revised_tab = np.copy(img_revised_tab2[:, :, 0]) + img_revised_tab[:, :][(text_regions_p[:, :] == 1) & (img_revised_tab[:, :] != 10)] = 1 else: - img_revised_tab = np.copy(text_regions_p[:,:]) - img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 - img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 - - text_regions_p[:,:][text_regions_p[:,:]==10] = 0 - text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 + img_revised_tab = np.copy(text_regions_p[:, :]) + img_revised_tab[:, :][img_revised_tab[:, :] == 10] = 0 + img_revised_tab[:, :][img_revised_tab2_d_rotated[:, :, 0] == 10] = 10 + + text_regions_p[:, :][text_regions_p[:, :] == 10] = 0 + text_regions_p[:, :][img_revised_tab[:, :] == 10] = 10 else: - img_revised_tab=text_regions_p[:,:] - #img_revised_tab = text_regions_p[:, :] + img_revised_tab = text_regions_p[:, :] + # img_revised_tab = text_regions_p[:, :] polygons_of_images = return_contours_of_interested_region(img_revised_tab, 2) pixel_img = 4 min_area_mar = 0.00001 polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - + pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - + self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables - def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts): + def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, + img_only_regions, table_prediction, erosion_hurts): self.logger.debug('enter run_boxes_full_layout') - + if self.tables: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) - - text_regions_p_1_n = resize_image(text_regions_p_1_n,text_regions_p.shape[0],text_regions_p.shape[1]) - textline_mask_tot_d = resize_image(textline_mask_tot_d,text_regions_p.shape[0],text_regions_p.shape[1]) - table_prediction_n = resize_image(table_prediction_n,text_regions_p.shape[0],text_regions_p.shape[1]) - - regions_without_separators_d=(text_regions_p_1_n[:,:] == 1)*1 - regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 + image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func( + image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) + + text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) + textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], + text_regions_p.shape[1]) + table_prediction_n = resize_image(table_prediction_n, text_regions_p.shape[0], text_regions_p.shape[1]) + + regions_without_separators_d = (text_regions_p_1_n[:, :] == 1) * 1 + regions_without_separators_d[table_prediction_n[:, :] == 1] = 1 else: text_regions_p_1_n = None textline_mask_tot_d = None regions_without_separators_d = None - - regions_without_separators = (text_regions_p[:,:] == 1)*1#( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) + + regions_without_separators = (text_regions_p[:, + :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_seperators_new(text_regions_p[:,:,0],img_only_regions) regions_without_separators[table_prediction == 1] = 1 - - pixel_lines=3 + + pixel_lines = 3 if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, peaks_neg_fin, matrix_of_lines_ch, splitter_y_new, seperators_closeup_n = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - + num_col, peaks_neg_fin, matrix_of_lines_ch, splitter_y_new, seperators_closeup_n = find_number_of_columns_in_document( + np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, + pixel_lines) + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, splitter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2),num_col_classifier, self.tables, pixel_lines) + num_col_d, peaks_neg_fin_d, matrix_of_lines_ch_d, splitter_y_new_d, seperators_closeup_n_d = find_number_of_columns_in_document( + np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, + pixel_lines) - if num_col_classifier>=3: + if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:,:], KERNEL, iterations=6) - + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:,:], KERNEL, iterations=6) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) else: pass - + if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, + regions_without_separators, + matrix_of_lines_ch, + num_col_classifier, + erosion_hurts, self.tables, + self.right2left) text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:]==1)] = 10 + text_regions_p_tables[:, :][(table_prediction[:, :] == 1)] = 10 pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) - - img_revised_tab2,contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction, 10, num_col_classifier) - + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, + peaks_neg_tot_tables, text_regions_p_tables, + num_col_classifier, 0.000005, pixel_line) + + img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables( + img_revised_tab2, table_prediction, 10, num_col_classifier) + else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, + regions_without_separators_d, + matrix_of_lines_ch_d, + num_col_classifier, + erosion_hurts, + self.tables, + self.right2left) text_regions_p_tables = np.copy(text_regions_p_1_n) text_regions_p_tables = np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:]!=3) & (table_prediction_n[:,:]==1)] = 10 - + text_regions_p_tables[:, :][(text_regions_p_tables[:, :] != 3) & (table_prediction_n[:, :] == 1)] = 10 + pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, table_prediction_n, 10, num_col_classifier) + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes_d, 0, + splitter_y_new_d, peaks_neg_tot_tables_d, + text_regions_p_tables, num_col_classifier, + 0.000005, pixel_line) + + img_revised_tab2_d, _ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2, + table_prediction_n, 10, + num_col_classifier) img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) - img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) - + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], + text_regions_p.shape[1]) if np.abs(slope_deskew) < 0.13: - img_revised_tab = np.copy(img_revised_tab2[:,:,0]) + img_revised_tab = np.copy(img_revised_tab2[:, :, 0]) else: - img_revised_tab = np.copy(text_regions_p[:,:]) - img_revised_tab[:,:][img_revised_tab[:,:] == 10] = 0 - img_revised_tab[:,:][img_revised_tab2_d_rotated[:,:,0] == 10] = 10 - - - ##img_revised_tab=img_revised_tab2[:,:,0] - #img_revised_tab=text_regions_p[:,:] - text_regions_p[:,:][text_regions_p[:,:]==10] = 0 - text_regions_p[:,:][img_revised_tab[:,:]==10] = 10 - #img_revised_tab[img_revised_tab2[:,:,0]==10] =10 - + img_revised_tab = np.copy(text_regions_p[:, :]) + img_revised_tab[:, :][img_revised_tab[:, :] == 10] = 0 + img_revised_tab[:, :][img_revised_tab2_d_rotated[:, :, 0] == 10] = 10 + + # img_revised_tab=img_revised_tab2[:,:,0] + # img_revised_tab=text_regions_p[:,:] + text_regions_p[:, :][text_regions_p[:, :] == 10] = 0 + text_regions_p[:, :][img_revised_tab[:, :] == 10] = 10 + # img_revised_tab[img_revised_tab2[:,:,0]==10] =10 + pixel_img = 4 min_area_mar = 0.00001 polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - + pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - + # set first model with second model text_regions_p[:, :][text_regions_p[:, :] == 2] = 5 text_regions_p[:, :][text_regions_p[:, :] == 3] = 6 @@ -2772,7 +2975,7 @@ class Eynollah: image_page = image_page.astype(np.uint8) regions_fully, regions_fully_only_drop = self.extract_text_regions(image_page, True, cols=num_col_classifier) - text_regions_p[:,:][regions_fully[:,:,0]==6]=6 + text_regions_p[:, :][regions_fully[:, :, 0] == 6] = 6 regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 @@ -2783,16 +2986,21 @@ class Eynollah: else: regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) - regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) + regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, + img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() text_regions_p[:, :][regions_fully[:, :, 0] == 4] = 4 text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4 - #plt.imshow(text_regions_p) - #plt.show() - ####if not self.tables: + # plt.imshow(text_regions_p) + # plt.show() + # if not self.tables: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - _, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout(image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew) + _, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout(image_page, + textline_mask_tot, + text_regions_p, + regions_fully, + slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) textline_mask_tot_d = resize_image(textline_mask_tot_d, text_regions_p.shape[0], text_regions_p.shape[1]) @@ -2809,13 +3017,14 @@ class Eynollah: polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) self.logger.debug('exit run_boxes_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables - + def our_load_model(self, model_file): - + try: model = load_model(model_file, compile=False) except: - model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) + model = load_model(model_file, compile=False, + custom_objects={"PatchEncoder": PatchEncoder, "Patches": Patches}) return model @@ -2825,42 +3034,48 @@ class Eynollah: """ self.logger.debug("enter run") - t0_tot = time.time() if not self.dir_in: self.ls_imgs = [1] - + for img_name in self.ls_imgs: t0 = time.time() if self.dir_in: - self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) - - img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) + self.reset_file_name_dir(os.path.join(self.dir_in, img_name)) + + img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement( + self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) - + t1 = time.time() if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + text_regions_p_1, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_light_v( + img_res, is_image_enhanced, num_col_classifier) slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + # self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts) - #self.logger.info("run graphics %.1fs ", time.time() - t1t) + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, + num_column_is_classified, erosion_hurts) + # self.logger.info("run graphics %.1fs ", time.time() - t1t) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + text_regions_p_1, erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, + is_image_enhanced, + num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) t1 = time.time() num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, + erosion_hurts) self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - #self.logger.info('cont_page %s', cont_page) - + # self.logger.info('cont_page %s', cont_page) + if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], []) + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], + cont_page, [], []) self.logger.info("Job done in %.1fs", time.time() - t1) if self.dir_in: self.writer.write_pagexml(pcgts) @@ -2877,36 +3092,45 @@ class Eynollah: slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) self.logger.info("deskewing took %.1fs", time.time() - t1) t1 = time.time() - #plt.imshow(table_prediction) - #plt.show() + # plt.imshow(table_prediction) + # plt.show() - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, + mask_images, mask_lines, + num_col_classifier, slope_deskew, + text_regions_p_1, + table_prediction) self.logger.info("detection of marginals took %.1fs", time.time() - t1) t1 = time.time() if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout( + image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, + erosion_hurts) if self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts) - text_only = ((img_revised_tab[:, :] == 1)) * 1 + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout( + image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, + table_prediction, erosion_hurts) + text_only = (img_revised_tab[:, :] == 1) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - - + text_only_d = (text_regions_p_1_n[:, :] == 1) * 1 + min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - + if len(contours_only_text_parent) > 0: areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) + # self.logger.info('areas_cnt_text %s', areas_cnt_text) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > min_con_area] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if + areas_cnt_text[jz] > min_con_area] areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area] index_con_parents = np.argsort(areas_cnt_text_parent) - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + contours_only_text_parent = list( + np.array(contours_only_text_parent, dtype=object)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) @@ -2917,27 +3141,34 @@ class Eynollah: areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) - - if len(areas_cnt_text_d)>0: + + if len(areas_cnt_text_d) > 0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + contours_only_text_parent_d = list( + np.array(contours_only_text_parent_d, dtype=object)[index_con_parents_d]) areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours( + [contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours( + contours_only_text_parent_d) try: if len(cx_bigest_d) >= 5: cx_bigest_d_last5 = cx_bigest_d[-5:] cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + ( + cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in + range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - 5 + np.argmin(dists_d) else: cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + ( + cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in + range(len(cy_biggest_d_last5))] ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) - + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] cy_biggest_d_big[0] = cy_biggest_d[ind_largest] except Exception as why: @@ -2956,7 +3187,8 @@ class Eynollah: p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) p[0] = p[0] - x_diff[0] p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in + range(len(cx_bigest_d))] contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) @@ -2966,7 +3198,7 @@ class Eynollah: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] - + else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] @@ -2974,88 +3206,137 @@ class Eynollah: else: contours_only_text, hir_on_text = return_contours_of_image(text_only) contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - + if len(contours_only_text_parent) > 0: areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > min_con_area] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if + areas_cnt_text[jz] > min_con_area] areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area] index_con_parents = np.argsort(areas_cnt_text_parent) - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + contours_only_text_parent = list( + np.array(contours_only_text_parent, dtype=object)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) + # self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) else: pass if self.light_version: - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, + slope_first) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - + if not self.curved_line: if self.light_version: if self.textline_light: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light( + txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, + boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light( + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, + boxes_marginals, slope_deskew) else: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light( + txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, + boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light( + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, + boxes_marginals, slope_deskew) else: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new( + txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, + slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new( + polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, + boxes_marginals, slope_deskew) else: - + scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved( + txt_con_org, contours_only_text_parent, + cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, + text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, + textline_mask_tot_ea, + num_col_classifier) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved( + polygons_of_marginals, polygons_of_marginals, + cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, + text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2( + all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + contours_only_text_parent_d_ordered = list( + np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light( + text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, + all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header( + text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, + all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - #takes long timee + # takes long timee contours_only_text_parent_d_ordered = None if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light( + text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, + all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header( + text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, + all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) if self.plotter: self.plotter.save_plot_of_layout(text_regions_p, image_page) self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - + pixel_img = 4 polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, + polygons_of_drop_capitals, + contours_only_text_parent, + contours_only_text_parent_h, + all_box_coord, + all_box_coord_h, + all_found_textline_polygons, + all_found_textline_polygons_h, + kernel=KERNEL, + curved_line=self.curved_line) pixel_lines = 6 - if not self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, + pixel_lines, contours_only_text_parent_h) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, + pixel_lines, contours_only_text_parent_h_d_ordered) elif self.headers_off: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, + pixel_lines) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document( + np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, + pixel_lines) if num_col_classifier >= 3: if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3064,44 +3345,79 @@ class Eynollah: else: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, + iterations=6) if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, + regions_without_separators, + matrix_of_lines_ch, + num_col_classifier, + erosion_hurts, + self.tables, + self.right2left) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - - #print(boxes_d,'boxes_d') - #img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1])) - #for box_i in boxes_d: - #img_once[int(box_i[2]):int(box_i[3]),int(box_i[0]):int(box_i[1]) ] =1 - #plt.imshow(img_once) - #plt.show() - #print(np.unique(img_once),'img_once') + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, + regions_without_separators_d, + matrix_of_lines_ch_d, + num_col_classifier, + erosion_hurts, + self.tables, + self.right2left) + + # print(boxes_d,'boxes_d') + # img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1])) + # for box_i in boxes_d: + # img_once[int(box_i[2]):int(box_i[3]),int(box_i[0]):int(box_i[1]) ] =1 + # plt.imshow(img_once) + # plt.show() + # print(np.unique(img_once),'img_once') if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) t_order = time.time() if self.full_layout: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, + contours_only_text_parent_h, boxes, + textline_mask_tot) else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, + contours_only_text_parent_h_d_ordered, + boxes_d, textline_mask_tot_d) + + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, + page_coord, order_text_new, id_of_texts_tot, + all_found_textline_polygons, + all_found_textline_polygons_h, all_box_coord, + all_box_coord_h, polygons_of_images, contours_tables, + polygons_of_drop_capitals, polygons_of_marginals, + all_found_textline_polygons_marginals, + all_box_coord_marginals, slopes, slopes_h, + slopes_marginals, cont_page, polygons_lines_xml) self.logger.info("Job done in %.1fs", time.time() - t0) - ##return pcgts + # return pcgts else: contours_only_text_parent_h = None if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, + contours_only_text_parent_h, boxes, + textline_mask_tot) else: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) + contours_only_text_parent_d_ordered = list( + np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, + contours_only_text_parent_h, boxes_d, + textline_mask_tot_d) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, + id_of_texts_tot, all_found_textline_polygons, + all_box_coord, polygons_of_images, + polygons_of_marginals, + all_found_textline_polygons_marginals, + all_box_coord_marginals, slopes, slopes_marginals, + cont_page, polygons_lines_xml, contours_tables) self.logger.info("Job done in %.1fs", time.time() - t0) - ##return pcgts + # return pcgts self.writer.write_pagexml(pcgts) - #self.logger.info("Job done in %.1fs", time.time() - t0) + # self.logger.info("Job done in %.1fs", time.time() - t0) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) From 1a95bca22dcdda46b689816ffc0d8da052f43530 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 23 Feb 2024 22:44:27 +0100 Subject: [PATCH 07/34] more code formatting --- eynollah/eynollah/cli.py | 5 +- eynollah/eynollah/ocrd_cli.py | 2 + eynollah/eynollah/plot.py | 91 ++- eynollah/eynollah/processor.py | 1 + ..._model_load_pretrained_weights_and_save.py | 16 +- eynollah/eynollah/train/metrics.py | 189 +++-- eynollah/eynollah/train/models.py | 237 +++--- eynollah/eynollah/train/pagexml2img.py | 361 +++++---- eynollah/eynollah/train/train.py | 277 +++---- eynollah/eynollah/train/utils.py | 761 +++++++++--------- eynollah/eynollah/utils/contour.py | 74 +- eynollah/eynollah/utils/counter.py | 1 + eynollah/eynollah/utils/drop_capitals.py | 203 ++--- eynollah/eynollah/utils/marginals.py | 304 ++++--- eynollah/eynollah/utils/pil_cv2.py | 5 +- eynollah/eynollah/utils/resize.py | 1 + eynollah/eynollah/utils/rotate.py | 12 +- eynollah/eynollah/utils/xml.py | 3 + eynollah/eynollah/writer.py | 189 +++-- tests/base.py | 5 +- tests/test_counter.py | 4 + tests/test_dpi.py | 2 + tests/test_run.py | 2 + tests/test_xml.py | 2 + 24 files changed, 1418 insertions(+), 1329 deletions(-) diff --git a/eynollah/eynollah/cli.py b/eynollah/eynollah/cli.py index b720d83..94bf211 100644 --- a/eynollah/eynollah/cli.py +++ b/eynollah/eynollah/cli.py @@ -199,8 +199,9 @@ def main( ignore_page_extraction=ignore_page_extraction, ) eynollah.run() - #pcgts = eynollah.run() - ##eynollah.writer.write_pagexml(pcgts) + # pcgts = eynollah.run() + # eynollah.writer.write_pagexml(pcgts) + if __name__ == "__main__": main() diff --git a/eynollah/eynollah/ocrd_cli.py b/eynollah/eynollah/ocrd_cli.py index 8929927..499661b 100644 --- a/eynollah/eynollah/ocrd_cli.py +++ b/eynollah/eynollah/ocrd_cli.py @@ -2,10 +2,12 @@ from .processor import EynollahProcessor from click import command from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor + @command() @ocrd_cli_options def main(*args, **kwargs): return ocrd_cli_wrap_processor(EynollahProcessor, *args, **kwargs) + if __name__ == '__main__': main() diff --git a/eynollah/eynollah/plot.py b/eynollah/eynollah/plot.py index b01fc04..ae85f2a 100644 --- a/eynollah/eynollah/plot.py +++ b/eynollah/eynollah/plot.py @@ -9,24 +9,25 @@ from .utils import crop_image_inside_box from .utils.rotate import rotate_image_different from .utils.resize import resize_image + class EynollahPlotter(): """ Class collecting all the plotting and image writing methods """ def __init__( - self, - *, - dir_out, - dir_of_all, - dir_save_page, - dir_of_deskewed, - dir_of_layout, - dir_of_cropped_images, - image_filename_stem, - image_org=None, - scale_x=1, - scale_y=1, + self, + *, + dir_out, + dir_of_all, + dir_save_page, + dir_of_deskewed, + dir_of_layout, + dir_of_cropped_images, + image_filename_stem, + image_org=None, + scale_x=1, + scale_y=1, ): self.dir_out = dir_out self.dir_of_all = dir_of_all @@ -44,22 +45,23 @@ class EynollahPlotter(): if self.dir_of_layout is not None: values = np.unique(text_regions_p[:, :]) # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics'] - pixels=['Background' , 'Main text' , 'Image' , 'Separator','Marginalia'] + pixels = ['Background', 'Main text', 'Image', 'Separator', 'Marginalia'] values_indexes = [0, 1, 2, 3, 4] plt.figure(figsize=(40, 40)) plt.rcParams["font.size"] = "40" im = plt.imshow(text_regions_p[:, :]) colors = [im.cmap(im.norm(value)) for value in values] - patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] + patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], + label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in + values] plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40) plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout_main.png")) - def save_plot_of_layout_main_all(self, text_regions_p, image_page): if self.dir_of_all is not None: values = np.unique(text_regions_p[:, :]) # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics'] - pixels=['Background' , 'Main text' , 'Image' , 'Separator','Marginalia'] + pixels = ['Background', 'Main text', 'Image', 'Separator', 'Marginalia'] values_indexes = [0, 1, 2, 3, 4] plt.figure(figsize=(80, 40)) plt.rcParams["font.size"] = "40" @@ -68,7 +70,9 @@ class EynollahPlotter(): plt.subplot(1, 2, 2) im = plt.imshow(text_regions_p[:, :]) colors = [im.cmap(im.norm(value)) for value in values] - patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] + patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], + label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in + values] plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60) plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_main_and_page.png")) @@ -82,7 +86,9 @@ class EynollahPlotter(): plt.rcParams["font.size"] = "40" im = plt.imshow(text_regions_p[:, :]) colors = [im.cmap(im.norm(value)) for value in values] - patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] + patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], + label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in + values] plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40) plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout.png")) @@ -99,7 +105,9 @@ class EynollahPlotter(): plt.subplot(1, 2, 2) im = plt.imshow(text_regions_p[:, :]) colors = [im.cmap(im.norm(value)) for value in values] - patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] + patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], + label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in + values] plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60) plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_and_page.png")) @@ -115,7 +123,9 @@ class EynollahPlotter(): plt.subplot(1, 2, 2) im = plt.imshow(textline_mask_tot_ea[:, :]) colors = [im.cmap(im.norm(value)) for value in values] - patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] + patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], + label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in + values] plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60) plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_textline_and_page.png")) @@ -131,33 +141,36 @@ class EynollahPlotter(): cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_page.png"), image_page) if self.dir_save_page is not None: cv2.imwrite(os.path.join(self.dir_save_page, self.image_filename_stem + "_page.png"), image_page) + def save_enhanced_image(self, img_res): cv2.imwrite(os.path.join(self.dir_out, self.image_filename_stem + "_enhanced.png"), img_res) - + def save_plot_of_textline_density(self, img_patch_org): if self.dir_of_all is not None: - plt.figure(figsize=(80,40)) - plt.rcParams['font.size']='50' - plt.subplot(1,2,1) + plt.figure(figsize=(80, 40)) + plt.rcParams['font.size'] = '50' + plt.subplot(1, 2, 1) plt.imshow(img_patch_org) - plt.subplot(1,2,2) - plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))),linewidth=8) - plt.xlabel('Density of textline prediction in direction of X axis',fontsize=60) - plt.ylabel('Height',fontsize=60) - plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))]) + plt.subplot(1, 2, 2) + plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3), + np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))), linewidth=8) + plt.xlabel('Density of textline prediction in direction of X axis', fontsize=60) + plt.ylabel('Height', fontsize=60) + plt.yticks([0, len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))]) plt.gca().invert_yaxis() - plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_density_of_textline.png')) + plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + '_density_of_textline.png')) def save_plot_of_rotation_angle(self, angels, var_res): if self.dir_of_all is not None: - plt.figure(figsize=(60,30)) - plt.rcParams['font.size']='50' - plt.plot(angels,np.array(var_res),'-o',markersize=25,linewidth=4) - plt.xlabel('angle',fontsize=50) - plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50) - plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))] ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$') + plt.figure(figsize=(60, 30)) + plt.rcParams['font.size'] = '50' + plt.plot(angels, np.array(var_res), '-o', markersize=25, linewidth=4) + plt.xlabel('angle', fontsize=50) + plt.ylabel('variance of sum of rotated textline in direction of x axis', fontsize=50) + plt.plot(angels[np.argmax(var_res)], var_res[np.argmax(np.array(var_res))], '*', markersize=50, + label='Angle of deskewing=' + str("{:.2f}".format(angels[np.argmax(var_res)])) + r'$\degree$') plt.legend(loc='best') - plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_rotation_angle.png')) + plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + '_rotation_angle.png')) def write_images_into_directory(self, img_contours, image_page): if self.dir_of_cropped_images is not None: @@ -167,9 +180,9 @@ class EynollahPlotter(): box = [x, y, w, h] croped_page, page_coord = crop_image_inside_box(box, image_page) - croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y), int(croped_page.shape[1] / self.scale_x)) + croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y), + int(croped_page.shape[1] / self.scale_x)) path = os.path.join(self.dir_of_cropped_images, self.image_filename_stem + "_" + str(index) + ".jpg") cv2.imwrite(path, croped_page) index += 1 - diff --git a/eynollah/eynollah/processor.py b/eynollah/eynollah/processor.py index ccec456..042d081 100644 --- a/eynollah/eynollah/processor.py +++ b/eynollah/eynollah/processor.py @@ -22,6 +22,7 @@ from .utils.pil_cv2 import pil2cv OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) + class EynollahProcessor(Processor): def __init__(self, *args, **kwargs): diff --git a/eynollah/eynollah/train/build_model_load_pretrained_weights_and_save.py b/eynollah/eynollah/train/build_model_load_pretrained_weights_and_save.py index 251e698..40cc1b6 100644 --- a/eynollah/eynollah/train/build_model_load_pretrained_weights_and_save.py +++ b/eynollah/eynollah/train/build_model_load_pretrained_weights_and_save.py @@ -1,7 +1,7 @@ import os import sys import tensorflow as tf -import keras , warnings +import keras, warnings from keras.optimizers import * from sacred import Experiment from models import * @@ -9,25 +9,21 @@ from utils import * from metrics import * - - def configuration(): gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) -if __name__=='__main__': +if __name__ == '__main__': n_classes = 2 input_height = 224 input_width = 448 weight_decay = 1e-6 pretraining = False dir_of_weights = 'model_bin_sbb_ens.h5' - - #configuration() - - model = resnet50_unet(n_classes, input_height, input_width,weight_decay,pretraining) + + # configuration() + + model = resnet50_unet(n_classes, input_height, input_width, weight_decay, pretraining) model.load_weights(dir_of_weights) model.save('./name_in_another_python_version.h5') - - diff --git a/eynollah/eynollah/train/metrics.py b/eynollah/eynollah/train/metrics.py index c63cc22..9d41d9e 100644 --- a/eynollah/eynollah/train/metrics.py +++ b/eynollah/eynollah/train/metrics.py @@ -2,8 +2,8 @@ from keras import backend as K import tensorflow as tf import numpy as np -def focal_loss(gamma=2., alpha=4.): +def focal_loss(gamma=2., alpha=4.): gamma = float(gamma) alpha = float(alpha) @@ -37,8 +37,10 @@ def focal_loss(gamma=2., alpha=4.): fl = tf.multiply(alpha, tf.multiply(weight, ce)) reduced_fl = tf.reduce_max(fl, axis=1) return tf.reduce_mean(reduced_fl) + return focal_loss_fixed + def weighted_categorical_crossentropy(weights=None): """ weighted_categorical_crossentropy @@ -50,90 +52,102 @@ def weighted_categorical_crossentropy(weights=None): def loss(y_true, y_pred): labels_floats = tf.cast(y_true, tf.float32) - per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats,logits=y_pred) - + per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats, logits=y_pred) + if weights is not None: weight_mask = tf.maximum(tf.reduce_max(tf.constant( np.array(weights, dtype=np.float32)[None, None, None]) - * labels_floats, axis=-1), 1.0) + * labels_floats, axis=-1), 1.0) per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None] return tf.reduce_mean(per_pixel_loss) + return loss + + def image_categorical_cross_entropy(y_true, y_pred, weights=None): """ :param y_true: tensor of shape (batch_size, height, width) representing the ground truth. :param y_pred: tensor of shape (batch_size, height, width) representing the prediction. :return: The mean cross-entropy on softmaxed tensors. """ - + labels_floats = tf.cast(y_true, tf.float32) - per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats,logits=y_pred) - + per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats, logits=y_pred) + if weights is not None: weight_mask = tf.maximum( - tf.reduce_max(tf.constant( - np.array(weights, dtype=np.float32)[None, None, None]) - * labels_floats, axis=-1), 1.0) + tf.reduce_max(tf.constant( + np.array(weights, dtype=np.float32)[None, None, None]) + * labels_floats, axis=-1), 1.0) per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None] - + return tf.reduce_mean(per_pixel_loss) + + def class_tversky(y_true, y_pred): - smooth = 1.0#1.00 + smooth = 1.0 # 1.00 - y_true = K.permute_dimensions(y_true, (3,1,2,0)) - y_pred = K.permute_dimensions(y_pred, (3,1,2,0)) + y_true = K.permute_dimensions(y_true, (3, 1, 2, 0)) + y_pred = K.permute_dimensions(y_pred, (3, 1, 2, 0)) y_true_pos = K.batch_flatten(y_true) y_pred_pos = K.batch_flatten(y_pred) true_pos = K.sum(y_true_pos * y_pred_pos, 1) - false_neg = K.sum(y_true_pos * (1-y_pred_pos), 1) - false_pos = K.sum((1-y_true_pos)*y_pred_pos, 1) - alpha = 0.2#0.5 - beta=0.8 - return (true_pos + smooth)/(true_pos + alpha*false_neg + (beta)*false_pos + smooth) + false_neg = K.sum(y_true_pos * (1 - y_pred_pos), 1) + false_pos = K.sum((1 - y_true_pos) * y_pred_pos, 1) + alpha = 0.2 # 0.5 + beta = 0.8 + return (true_pos + smooth) / (true_pos + alpha * false_neg + (beta) * false_pos + smooth) -def focal_tversky_loss(y_true,y_pred): + +def focal_tversky_loss(y_true, y_pred): pt_1 = class_tversky(y_true, y_pred) - gamma =1.3#4./3.0#1.3#4.0/3.00# 0.75 - return K.sum(K.pow((1-pt_1), gamma)) + gamma = 1.3 # 4./3.0#1.3#4.0/3.00# 0.75 + return K.sum(K.pow((1 - pt_1), gamma)) + def generalized_dice_coeff2(y_true, y_pred): n_el = 1 - for dim in y_true.shape: + for dim in y_true.shape: n_el *= int(dim) n_cl = y_true.shape[-1] w = K.zeros(shape=(n_cl,)) - w = (K.sum(y_true, axis=(0,1,2)))/(n_el) - w = 1/(w**2+0.000001) - numerator = y_true*y_pred - numerator = w*K.sum(numerator,(0,1,2)) + w = (K.sum(y_true, axis=(0, 1, 2))) / (n_el) + w = 1 / (w ** 2 + 0.000001) + numerator = y_true * y_pred + numerator = w * K.sum(numerator, (0, 1, 2)) numerator = K.sum(numerator) - denominator = y_true+y_pred - denominator = w*K.sum(denominator,(0,1,2)) + denominator = y_true + y_pred + denominator = w * K.sum(denominator, (0, 1, 2)) denominator = K.sum(denominator) - return 2*numerator/denominator + return 2 * numerator / denominator + + def generalized_dice_coeff(y_true, y_pred): - axes = tuple(range(1, len(y_pred.shape)-1)) + axes = tuple(range(1, len(y_pred.shape) - 1)) Ncl = y_pred.shape[-1] w = K.zeros(shape=(Ncl,)) w = K.sum(y_true, axis=axes) - w = 1/(w**2+0.000001) + w = 1 / (w ** 2 + 0.000001) # Compute gen dice coef: - numerator = y_true*y_pred - numerator = w*K.sum(numerator,axes) + numerator = y_true * y_pred + numerator = w * K.sum(numerator, axes) numerator = K.sum(numerator) - denominator = y_true+y_pred - denominator = w*K.sum(denominator,axes) + denominator = y_true + y_pred + denominator = w * K.sum(denominator, axes) denominator = K.sum(denominator) - gen_dice_coef = 2*numerator/denominator + gen_dice_coef = 2 * numerator / denominator return gen_dice_coef + def generalized_dice_loss(y_true, y_pred): return 1 - generalized_dice_coeff2(y_true, y_pred) -def soft_dice_loss(y_true, y_pred, epsilon=1e-6): + + +def soft_dice_loss(y_true, y_pred, epsilon=1e-6): ''' Soft dice loss calculation for arbitrary batch size, number of classes, and number of spatial dimensions. Assumes the `channels_last` format. @@ -151,16 +165,18 @@ def soft_dice_loss(y_true, y_pred, epsilon=1e-6): Adapted from https://github.com/Lasagne/Recipes/issues/99#issuecomment-347775022 ''' - + # skip the batch and class axis for calculating Dice score - axes = tuple(range(1, len(y_pred.shape)-1)) - + axes = tuple(range(1, len(y_pred.shape) - 1)) + numerator = 2. * K.sum(y_pred * y_true, axes) denominator = K.sum(K.square(y_pred) + K.square(y_true), axes) - return 1.00 - K.mean(numerator / (denominator + epsilon)) # average over classes and batch + return 1.00 - K.mean(numerator / (denominator + epsilon)) # average over classes and batch + -def seg_metrics(y_true, y_pred, metric_name, metric_type='standard', drop_last = True, mean_per_class=False, verbose=False): +def seg_metrics(y_true, y_pred, metric_name, metric_type='standard', drop_last=True, mean_per_class=False, + verbose=False): """ Compute mean metrics of two segmentation masks, via Keras. @@ -193,13 +209,13 @@ def seg_metrics(y_true, y_pred, metric_name, metric_type='standard', drop_last = H = height, N = number of classes """ - + flag_soft = (metric_type == 'soft') flag_naive_mean = (metric_type == 'naive') - + # always assume one or more classes num_classes = K.shape(y_true)[-1] - + if not flag_soft: # get one-hot encoded masks from y_pred (true masks should already be one-hot) y_pred = K.one_hot(K.argmax(y_pred), num_classes) @@ -211,29 +227,29 @@ def seg_metrics(y_true, y_pred, metric_name, metric_type='standard', drop_last = y_pred = K.cast(y_pred, 'float32') # intersection and union shapes are batch_size * n_classes (values = area in pixels) - axes = (1,2) # W,H axes of each image + axes = (1, 2) # W,H axes of each image intersection = K.sum(K.abs(y_true * y_pred), axis=axes) mask_sum = K.sum(K.abs(y_true), axis=axes) + K.sum(K.abs(y_pred), axis=axes) - union = mask_sum - intersection # or, np.logical_or(y_pred, y_true) for one-hot + union = mask_sum - intersection # or, np.logical_or(y_pred, y_true) for one-hot smooth = .001 iou = (intersection + smooth) / (union + smooth) - dice = 2 * (intersection + smooth)/(mask_sum + smooth) + dice = 2 * (intersection + smooth) / (mask_sum + smooth) metric = {'iou': iou, 'dice': dice}[metric_name] # define mask to be 0 when no pixels are present in either y_true or y_pred, 1 otherwise - mask = K.cast(K.not_equal(union, 0), 'float32') - + mask = K.cast(K.not_equal(union, 0), 'float32') + if drop_last: - metric = metric[:,:-1] - mask = mask[:,:-1] - + metric = metric[:, :-1] + mask = mask[:, :-1] + if verbose: print('intersection, union') print(K.eval(intersection), K.eval(union)) - print(K.eval(intersection/union)) - + print(K.eval(intersection / union)) + # return mean metrics: remaining axes are (batch, classes) if flag_naive_mean: return K.mean(metric) @@ -243,13 +259,14 @@ def seg_metrics(y_true, y_pred, metric_name, metric_type='standard', drop_last = non_zero = tf.greater(class_count, 0) non_zero_sum = tf.boolean_mask(K.sum(metric * mask, axis=0), non_zero) non_zero_count = tf.boolean_mask(class_count, non_zero) - + if verbose: print('Counts of inputs with class present, metrics for non-absent classes') print(K.eval(class_count), K.eval(non_zero_sum / non_zero_count)) - + return K.mean(non_zero_sum / non_zero_count) + def mean_iou(y_true, y_pred, **kwargs): """ Compute mean Intersection over Union of two segmentation masks, via Keras. @@ -257,65 +274,69 @@ def mean_iou(y_true, y_pred, **kwargs): Calls metrics_k(y_true, y_pred, metric_name='iou'), see there for allowed kwargs. """ return seg_metrics(y_true, y_pred, metric_name='iou', **kwargs) + + def Mean_IOU(y_true, y_pred): nb_classes = K.int_shape(y_pred)[-1] iou = [] true_pixels = K.argmax(y_true, axis=-1) pred_pixels = K.argmax(y_pred, axis=-1) void_labels = K.equal(K.sum(y_true, axis=-1), 0) - for i in range(0, nb_classes): # exclude first label (background) and last label (void) - true_labels = K.equal(true_pixels, i)# & ~void_labels - pred_labels = K.equal(pred_pixels, i)# & ~void_labels + for i in range(0, nb_classes): # exclude first label (background) and last label (void) + true_labels = K.equal(true_pixels, i) # & ~void_labels + pred_labels = K.equal(pred_pixels, i) # & ~void_labels inter = tf.to_int32(true_labels & pred_labels) union = tf.to_int32(true_labels | pred_labels) - legal_batches = K.sum(tf.to_int32(true_labels), axis=1)>0 - ious = K.sum(inter, axis=1)/K.sum(union, axis=1) - iou.append(K.mean(tf.gather(ious, indices=tf.where(legal_batches)))) # returns average IoU of the same objects + legal_batches = K.sum(tf.to_int32(true_labels), axis=1) > 0 + ious = K.sum(inter, axis=1) / K.sum(union, axis=1) + iou.append(K.mean(tf.gather(ious, indices=tf.where(legal_batches)))) # returns average IoU of the same objects iou = tf.stack(iou) legal_labels = ~tf.debugging.is_nan(iou) iou = tf.gather(iou, indices=tf.where(legal_labels)) return K.mean(iou) + def iou_vahid(y_true, y_pred): - nb_classes = tf.shape(y_true)[-1]+tf.to_int32(1) + nb_classes = tf.shape(y_true)[-1] + tf.to_int32(1) true_pixels = K.argmax(y_true, axis=-1) pred_pixels = K.argmax(y_pred, axis=-1) iou = [] - + for i in tf.range(nb_classes): - tp=K.sum( tf.to_int32( K.equal(true_pixels, i) & K.equal(pred_pixels, i) ) ) - fp=K.sum( tf.to_int32( K.not_equal(true_pixels, i) & K.equal(pred_pixels, i) ) ) - fn=K.sum( tf.to_int32( K.equal(true_pixels, i) & K.not_equal(pred_pixels, i) ) ) - iouh=tp/(tp+fp+fn) + tp = K.sum(tf.to_int32(K.equal(true_pixels, i) & K.equal(pred_pixels, i))) + fp = K.sum(tf.to_int32(K.not_equal(true_pixels, i) & K.equal(pred_pixels, i))) + fn = K.sum(tf.to_int32(K.equal(true_pixels, i) & K.not_equal(pred_pixels, i))) + iouh = tp / (tp + fp + fn) iou.append(iouh) return K.mean(iou) - - -def IoU_metric(Yi,y_predi): - ## mean Intersection over Union - ## Mean IoU = TP/(FN + TP + FP) + + +def IoU_metric(Yi, y_predi): + # mean Intersection over Union + # Mean IoU = TP/(FN + TP + FP) y_predi = np.argmax(y_predi, axis=3) y_testi = np.argmax(Yi, axis=3) IoUs = [] Nclass = int(np.max(Yi)) + 1 for c in range(Nclass): - TP = np.sum( (Yi == c)&(y_predi==c) ) - FP = np.sum( (Yi != c)&(y_predi==c) ) - FN = np.sum( (Yi == c)&(y_predi != c)) - IoU = TP/float(TP + FP + FN) + TP = np.sum((Yi == c) & (y_predi == c)) + FP = np.sum((Yi != c) & (y_predi == c)) + FN = np.sum((Yi == c) & (y_predi != c)) + IoU = TP / float(TP + FP + FN) IoUs.append(IoU) - return K.cast( np.mean(IoUs) ,dtype='float32' ) + return K.cast(np.mean(IoUs), dtype='float32') def IoU_metric_keras(y_true, y_pred): - ## mean Intersection over Union - ## Mean IoU = TP/(FN + TP + FP) + # mean Intersection over Union + # Mean IoU = TP/(FN + TP + FP) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) - + return IoU_metric(y_true.eval(session=sess), y_pred.eval(session=sess)) + def jaccard_distance_loss(y_true, y_pred, smooth=100): """ Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|) @@ -334,5 +355,3 @@ def jaccard_distance_loss(y_true, y_pred, smooth=100): sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1) jac = (intersection + smooth) / (sum_ - intersection + smooth) return (1 - jac) * smooth - - diff --git a/eynollah/eynollah/train/models.py b/eynollah/eynollah/train/models.py index 7c806b4..7a1e246 100644 --- a/eynollah/eynollah/train/models.py +++ b/eynollah/eynollah/train/models.py @@ -3,19 +3,20 @@ from keras.layers import * from keras import layers from keras.regularizers import l2 -resnet50_Weights_path='./pretrained_model/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' -IMAGE_ORDERING ='channels_last' -MERGE_AXIS=-1 +resnet50_Weights_path = './pretrained_model/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' +IMAGE_ORDERING = 'channels_last' +MERGE_AXIS = -1 -def one_side_pad( x ): +def one_side_pad(x): x = ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING)(x) if IMAGE_ORDERING == 'channels_first': - x = Lambda(lambda x : x[: , : , :-1 , :-1 ] )(x) + x = Lambda(lambda x: x[:, :, :-1, :-1])(x) elif IMAGE_ORDERING == 'channels_last': - x = Lambda(lambda x : x[: , :-1 , :-1 , : ] )(x) + x = Lambda(lambda x: x[:, :-1, :-1, :])(x) return x + def identity_block(input_tensor, kernel_size, filters, stage, block): """The identity block is the block that has no conv layer at shortcut. # Arguments @@ -28,7 +29,7 @@ def identity_block(input_tensor, kernel_size, filters, stage, block): Output tensor for the block. """ filters1, filters2, filters3 = filters - + if IMAGE_ORDERING == 'channels_last': bn_axis = 3 else: @@ -37,16 +38,16 @@ def identity_block(input_tensor, kernel_size, filters, stage, block): conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' - x = Conv2D(filters1, (1, 1) , data_format=IMAGE_ORDERING , name=conv_name_base + '2a')(input_tensor) + x = Conv2D(filters1, (1, 1), data_format=IMAGE_ORDERING, name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) - x = Conv2D(filters2, kernel_size , data_format=IMAGE_ORDERING , + x = Conv2D(filters2, kernel_size, data_format=IMAGE_ORDERING, padding='same', name=conv_name_base + '2b')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) - x = Conv2D(filters3 , (1, 1), data_format=IMAGE_ORDERING , name=conv_name_base + '2c')(x) + x = Conv2D(filters3, (1, 1), data_format=IMAGE_ORDERING, name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) x = layers.add([x, input_tensor]) @@ -68,7 +69,7 @@ def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)) And the shortcut should have strides=(2,2) as well """ filters1, filters2, filters3 = filters - + if IMAGE_ORDERING == 'channels_last': bn_axis = 3 else: @@ -77,20 +78,20 @@ def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)) conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' - x = Conv2D(filters1, (1, 1) , data_format=IMAGE_ORDERING , strides=strides, + x = Conv2D(filters1, (1, 1), data_format=IMAGE_ORDERING, strides=strides, name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) - x = Conv2D(filters2, kernel_size , data_format=IMAGE_ORDERING , padding='same', + x = Conv2D(filters2, kernel_size, data_format=IMAGE_ORDERING, padding='same', name=conv_name_base + '2b')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) - x = Conv2D(filters3, (1, 1) , data_format=IMAGE_ORDERING , name=conv_name_base + '2c')(x) + x = Conv2D(filters3, (1, 1), data_format=IMAGE_ORDERING, name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - shortcut = Conv2D(filters3, (1, 1) , data_format=IMAGE_ORDERING , strides=strides, + shortcut = Conv2D(filters3, (1, 1), data_format=IMAGE_ORDERING, strides=strides, name=conv_name_base + '1')(input_tensor) shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) @@ -99,12 +100,11 @@ def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)) return x -def resnet50_unet_light(n_classes,input_height=224,input_width=224,weight_decay=1e-6,pretraining=False): - assert input_height%32 == 0 - assert input_width%32 == 0 +def resnet50_unet_light(n_classes, input_height=224, input_width=224, weight_decay=1e-6, pretraining=False): + assert input_height % 32 == 0 + assert input_width % 32 == 0 - - img_input = Input(shape=(input_height,input_width , 3 )) + img_input = Input(shape=(input_height, input_width, 3)) if IMAGE_ORDERING == 'channels_last': bn_axis = 3 @@ -112,25 +112,24 @@ def resnet50_unet_light(n_classes,input_height=224,input_width=224,weight_decay= bn_axis = 1 x = ZeroPadding2D((3, 3), data_format=IMAGE_ORDERING)(img_input) - x = Conv2D(64, (7, 7), data_format=IMAGE_ORDERING, strides=(2, 2),kernel_regularizer=l2(weight_decay), name='conv1')(x) + x = Conv2D(64, (7, 7), data_format=IMAGE_ORDERING, strides=(2, 2), kernel_regularizer=l2(weight_decay), + name='conv1')(x) f1 = x x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) - x = MaxPooling2D((3, 3) , data_format=IMAGE_ORDERING , strides=(2, 2))(x) - + x = MaxPooling2D((3, 3), data_format=IMAGE_ORDERING, strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') - f2 = one_side_pad(x ) - + f2 = one_side_pad(x) x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') - f3 = x + f3 = x x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') @@ -138,85 +137,72 @@ def resnet50_unet_light(n_classes,input_height=224,input_width=224,weight_decay= x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') - f4 = x + f4 = x x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') - f5 = x - + f5 = x if pretraining: - model=Model( img_input , x ).load_weights(resnet50_Weights_path) + model = Model(img_input, x).load_weights(resnet50_Weights_path) - - v512_2048 = Conv2D( 512 , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( f5 ) - v512_2048 = ( BatchNormalization(axis=bn_axis))(v512_2048) + v512_2048 = Conv2D(512, (1, 1), padding='same', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay))(f5) + v512_2048 = (BatchNormalization(axis=bn_axis))(v512_2048) v512_2048 = Activation('relu')(v512_2048) - - - v512_1024=Conv2D( 512 , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( f4 ) - v512_1024 = ( BatchNormalization(axis=bn_axis))(v512_1024) + v512_1024 = Conv2D(512, (1, 1), padding='same', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay))(f4) + v512_1024 = (BatchNormalization(axis=bn_axis))(v512_1024) v512_1024 = Activation('relu')(v512_1024) - - - o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(v512_2048) - o = ( concatenate([ o ,v512_1024],axis=MERGE_AXIS ) ) - o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) - o = ( Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay)))(o) - o = ( BatchNormalization(axis=bn_axis))(o) - o = Activation('relu')(o) - - o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) - o = ( concatenate([ o ,f3],axis=MERGE_AXIS ) ) - o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) - o = ( Conv2D( 256, (3, 3), padding='valid', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay)))(o) - o = ( BatchNormalization(axis=bn_axis))(o) + o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(v512_2048) + o = (concatenate([o, v512_1024], axis=MERGE_AXIS)) + o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) + o = (Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay)))(o) + o = (BatchNormalization(axis=bn_axis))(o) o = Activation('relu')(o) - - o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) - o = ( concatenate([o,f2],axis=MERGE_AXIS ) ) - o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING))(o) - o = ( Conv2D( 128 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay) ) )(o) - o = ( BatchNormalization(axis=bn_axis))(o) + o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) + o = (concatenate([o, f3], axis=MERGE_AXIS)) + o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) + o = (Conv2D(256, (3, 3), padding='valid', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay)))(o) + o = (BatchNormalization(axis=bn_axis))(o) o = Activation('relu')(o) - - - o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) - o = ( concatenate([o,f1],axis=MERGE_AXIS ) ) - o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) - o = ( Conv2D( 64 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) ))(o) - o = ( BatchNormalization(axis=bn_axis))(o) + o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) + o = (concatenate([o, f2], axis=MERGE_AXIS)) + o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) + o = (Conv2D(128, (3, 3), padding='valid', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay)))(o) + o = (BatchNormalization(axis=bn_axis))(o) o = Activation('relu')(o) + o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) + o = (concatenate([o, f1], axis=MERGE_AXIS)) + o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) + o = (Conv2D(64, (3, 3), padding='valid', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay)))(o) + o = (BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) - o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) - o = ( concatenate([o,img_input],axis=MERGE_AXIS ) ) - o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) - o = ( Conv2D( 32 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) ))(o) - o = ( BatchNormalization(axis=bn_axis))(o) + o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) + o = (concatenate([o, img_input], axis=MERGE_AXIS)) + o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) + o = (Conv2D(32, (3, 3), padding='valid', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay)))(o) + o = (BatchNormalization(axis=bn_axis))(o) o = Activation('relu')(o) - - - o = Conv2D( n_classes , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( o ) - o = ( BatchNormalization(axis=bn_axis))(o) + o = Conv2D(n_classes, (1, 1), padding='same', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay))(o) + o = (BatchNormalization(axis=bn_axis))(o) o = (Activation('softmax'))(o) - - model = Model( img_input , o ) + model = Model(img_input, o) return model -def resnet50_unet(n_classes,input_height=224,input_width=224,weight_decay=1e-6,pretraining=False): - assert input_height%32 == 0 - assert input_width%32 == 0 - - img_input = Input(shape=(input_height,input_width , 3 )) +def resnet50_unet(n_classes, input_height=224, input_width=224, weight_decay=1e-6, pretraining=False): + assert input_height % 32 == 0 + assert input_width % 32 == 0 + + img_input = Input(shape=(input_height, input_width, 3)) if IMAGE_ORDERING == 'channels_last': bn_axis = 3 @@ -224,25 +210,24 @@ def resnet50_unet(n_classes,input_height=224,input_width=224,weight_decay=1e-6,p bn_axis = 1 x = ZeroPadding2D((3, 3), data_format=IMAGE_ORDERING)(img_input) - x = Conv2D(64, (7, 7), data_format=IMAGE_ORDERING, strides=(2, 2),kernel_regularizer=l2(weight_decay), name='conv1')(x) + x = Conv2D(64, (7, 7), data_format=IMAGE_ORDERING, strides=(2, 2), kernel_regularizer=l2(weight_decay), + name='conv1')(x) f1 = x x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) - x = MaxPooling2D((3, 3) , data_format=IMAGE_ORDERING , strides=(2, 2))(x) - + x = MaxPooling2D((3, 3), data_format=IMAGE_ORDERING, strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') - f2 = one_side_pad(x ) - + f2 = one_side_pad(x) x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') - f3 = x + f3 = x x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') @@ -250,68 +235,60 @@ def resnet50_unet(n_classes,input_height=224,input_width=224,weight_decay=1e-6,p x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') - f4 = x + f4 = x x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') - f5 = x + f5 = x if pretraining: - Model( img_input , x ).load_weights(resnet50_Weights_path) + Model(img_input, x).load_weights(resnet50_Weights_path) - v1024_2048 = Conv2D( 1024 , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( f5 ) - v1024_2048 = ( BatchNormalization(axis=bn_axis))(v1024_2048) + v1024_2048 = Conv2D(1024, (1, 1), padding='same', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay))( + f5) + v1024_2048 = (BatchNormalization(axis=bn_axis))(v1024_2048) v1024_2048 = Activation('relu')(v1024_2048) - - - o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(v1024_2048) - o = ( concatenate([ o ,f4],axis=MERGE_AXIS ) ) - o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) - o = ( Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay)))(o) - o = ( BatchNormalization(axis=bn_axis))(o) - o = Activation('relu')(o) - - o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) - o = ( concatenate([ o ,f3],axis=MERGE_AXIS ) ) - o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) - o = ( Conv2D( 256, (3, 3), padding='valid', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay)))(o) - o = ( BatchNormalization(axis=bn_axis))(o) + o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(v1024_2048) + o = (concatenate([o, f4], axis=MERGE_AXIS)) + o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) + o = (Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay)))(o) + o = (BatchNormalization(axis=bn_axis))(o) o = Activation('relu')(o) - - o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) - o = ( concatenate([o,f2],axis=MERGE_AXIS ) ) - o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING))(o) - o = ( Conv2D( 128 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay) ) )(o) - o = ( BatchNormalization(axis=bn_axis))(o) + o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) + o = (concatenate([o, f3], axis=MERGE_AXIS)) + o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) + o = (Conv2D(256, (3, 3), padding='valid', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay)))(o) + o = (BatchNormalization(axis=bn_axis))(o) o = Activation('relu')(o) - - o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) - o = ( concatenate([o,f1],axis=MERGE_AXIS ) ) - o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) - o = ( Conv2D( 64 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) ))(o) - o = ( BatchNormalization(axis=bn_axis))(o) + o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) + o = (concatenate([o, f2], axis=MERGE_AXIS)) + o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) + o = (Conv2D(128, (3, 3), padding='valid', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay)))(o) + o = (BatchNormalization(axis=bn_axis))(o) o = Activation('relu')(o) + o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) + o = (concatenate([o, f1], axis=MERGE_AXIS)) + o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) + o = (Conv2D(64, (3, 3), padding='valid', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay)))(o) + o = (BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) - o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) - o = ( concatenate([o,img_input],axis=MERGE_AXIS ) ) - o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) - o = ( Conv2D( 32 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) ))(o) - o = ( BatchNormalization(axis=bn_axis))(o) + o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) + o = (concatenate([o, img_input], axis=MERGE_AXIS)) + o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) + o = (Conv2D(32, (3, 3), padding='valid', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay)))(o) + o = (BatchNormalization(axis=bn_axis))(o) o = Activation('relu')(o) - - - o = Conv2D( n_classes , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( o ) - o = ( BatchNormalization(axis=bn_axis))(o) + + o = Conv2D(n_classes, (1, 1), padding='same', data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay))(o) + o = (BatchNormalization(axis=bn_axis))(o) o = (Activation('softmax'))(o) - - model = Model( img_input , o ) - - + model = Model(img_input, o) return model diff --git a/eynollah/eynollah/train/pagexml2img.py b/eynollah/eynollah/train/pagexml2img.py index c489315..8570f4f 100644 --- a/eynollah/eynollah/train/pagexml2img.py +++ b/eynollah/eynollah/train/pagexml2img.py @@ -1,6 +1,6 @@ #! /usr/bin/env python3 -__version__= '1.0' +__version__ = '1.0' import argparse import sys @@ -14,235 +14,260 @@ import cv2 with warnings.catch_warnings(): warnings.simplefilter("ignore") -__doc__=\ -""" +__doc__ = \ + """ tool to extract 2d or 3d RGB images from page xml data. In former case output will be 1 2D image array which each class has filled with a pixel value. In the case of 3D RGB image each class will be defined with a RGB value and beside images a text file of classes also will be produced. This classes.txt file is required for dhsegment tool. """ + class pagexml2img: - def __init__(self,dir_in, out_dir,output_type): - self.dir=dir_in - self.output_dir=out_dir - self.output_type=output_type + def __init__(self, dir_in, out_dir, output_type): + self.dir = dir_in + self.output_dir = out_dir + self.output_type = output_type def get_content_of_dir(self): """ Listing all ground truth page xml files. All files are needed to have xml format. """ - - gt_all=os.listdir(self.dir) - self.gt_list=[file for file in gt_all if file.split('.')[ len(file.split('.'))-1 ]=='xml' ] + gt_all = os.listdir(self.dir) + self.gt_list = [file for file in gt_all if file.split('.')[len(file.split('.')) - 1] == 'xml'] def get_images_of_ground_truth(self): """ Reading the page xml files and write the ground truth images into given output directory. """ - if self.output_type=='3d' or self.output_type=='3D': - classes=np.array([ [0,0,0, 1, 0, 0, 0, 0], - [255,0,0, 0, 1, 0, 0, 0], - [0,255,0, 0, 0, 1, 0, 0], - [0,0,255, 0, 0, 0, 1, 0], - [0,255,255, 0, 0, 0, 0, 1] ]) - - - + if self.output_type == '3d' or self.output_type == '3D': + classes = np.array([[0, 0, 0, 1, 0, 0, 0, 0], + [255, 0, 0, 0, 1, 0, 0, 0], + [0, 255, 0, 0, 0, 1, 0, 0], + [0, 0, 255, 0, 0, 0, 1, 0], + [0, 255, 255, 0, 0, 0, 0, 1]]) for index in tqdm(range(len(self.gt_list))): try: - tree1 = ET.parse(self.dir+'/'+self.gt_list[index]) - root1=tree1.getroot() - alltags=[elem.tag for elem in root1.iter()] - link=alltags[0].split('}')[0]+'}' - - region_tags=np.unique([x for x in alltags if x.endswith('Region')]) - - for jj in root1.iter(link+'Page'): - y_len=int(jj.attrib['imageHeight']) - x_len=int(jj.attrib['imageWidth']) - - co_text=[] - co_sep=[] - co_img=[] - co_table=[] + tree1 = ET.parse(self.dir + '/' + self.gt_list[index]) + root1 = tree1.getroot() + alltags = [elem.tag for elem in root1.iter()] + link = alltags[0].split('}')[0] + '}' + + region_tags = np.unique([x for x in alltags if x.endswith('Region')]) + + for jj in root1.iter(link + 'Page'): + y_len = int(jj.attrib['imageHeight']) + x_len = int(jj.attrib['imageWidth']) + + co_text = [] + co_sep = [] + co_img = [] + co_table = [] for tag in region_tags: - if tag.endswith('}TextRegion') or tag.endswith('}Textregion') or tag.endswith('}textRegion') or tag.endswith('}textregion'): - + if tag.endswith('}TextRegion') or tag.endswith('}Textregion') or tag.endswith( + '}textRegion') or tag.endswith('}textregion'): + for nn in root1.iter(tag): - for co_it in nn.iter(link+'Coords'): - if bool(co_it.attrib)==False: - c_t_in=[] - for ll in nn.iter(link+'Point'): - c_t_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + for co_it in nn.iter(link + 'Coords'): + if bool(co_it.attrib) == False: + c_t_in = [] + for ll in nn.iter(link + 'Point'): + c_t_in.append( + [int(np.float(ll.attrib['x'])), int(np.float(ll.attrib['y']))]) co_text.append(np.array(c_t_in)) print(co_text) - elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): - p_h=co_it.attrib['points'].split(' ') - co_text.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + elif bool(co_it.attrib) == True and 'points' in co_it.attrib.keys(): + p_h = co_it.attrib['points'].split(' ') + co_text.append( + np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) - - elif tag.endswith('}ImageRegion') or tag.endswith('}Imageregion') or tag.endswith('}imageRegion') or tag.endswith('}imageregion'): + elif tag.endswith('}ImageRegion') or tag.endswith('}Imageregion') or tag.endswith( + '}imageRegion') or tag.endswith('}imageregion'): for nn in root1.iter(tag): - for co_it in nn.iter(link+'Coords'): - if bool(co_it.attrib)==False: - c_i_in=[] - for ll in nn.iter(link+'Point'): - c_i_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + for co_it in nn.iter(link + 'Coords'): + if bool(co_it.attrib) == False: + c_i_in = [] + for ll in nn.iter(link + 'Point'): + c_i_in.append( + [int(np.float(ll.attrib['x'])), int(np.float(ll.attrib['y']))]) co_img.append(np.array(c_i_in)) - elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): - p_h=co_it.attrib['points'].split(' ') - co_img.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - - elif tag.endswith('}SeparatorRegion') or tag.endswith('}Separatorregion') or tag.endswith('}separatorRegion') or tag.endswith('}separatorregion'): + elif bool(co_it.attrib) == True and 'points' in co_it.attrib.keys(): + p_h = co_it.attrib['points'].split(' ') + co_img.append( + np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) + + elif tag.endswith('}SeparatorRegion') or tag.endswith('}Separatorregion') or tag.endswith( + '}separatorRegion') or tag.endswith('}separatorregion'): for nn in root1.iter(tag): - for co_it in nn.iter(link+'Coords'): - if bool(co_it.attrib)==False: - c_s_in=[] - for ll in nn.iter(link+'Point'): - c_s_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + for co_it in nn.iter(link + 'Coords'): + if bool(co_it.attrib) == False: + c_s_in = [] + for ll in nn.iter(link + 'Point'): + c_s_in.append( + [int(np.float(ll.attrib['x'])), int(np.float(ll.attrib['y']))]) co_sep.append(np.array(c_s_in)) - - elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): - p_h=co_it.attrib['points'].split(' ') - co_sep.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - - elif tag.endswith('}TableRegion') or tag.endswith('}tableRegion') or tag.endswith('}Tableregion') or tag.endswith('}tableregion'): + + elif bool(co_it.attrib) == True and 'points' in co_it.attrib.keys(): + p_h = co_it.attrib['points'].split(' ') + co_sep.append( + np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) + + elif tag.endswith('}TableRegion') or tag.endswith('}tableRegion') or tag.endswith( + '}Tableregion') or tag.endswith('}tableregion'): for nn in root1.iter(tag): - for co_it in nn.iter(link+'Coords'): - if bool(co_it.attrib)==False: - c_ta_in=[] - for ll in nn.iter(link+'Point'): - c_ta_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + for co_it in nn.iter(link + 'Coords'): + if bool(co_it.attrib) == False: + c_ta_in = [] + for ll in nn.iter(link + 'Point'): + c_ta_in.append( + [int(np.float(ll.attrib['x'])), int(np.float(ll.attrib['y']))]) co_table.append(np.array(c_ta_in)) - - elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): - p_h=co_it.attrib['points'].split(' ') - co_table.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + + elif bool(co_it.attrib) == True and 'points' in co_it.attrib.keys(): + p_h = co_it.attrib['points'].split(' ') + co_table.append( + np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) else: pass - - img = np.zeros( (y_len,x_len,3) ) - img_poly=cv2.fillPoly(img, pts =co_text, color=(255,0,0)) - img_poly=cv2.fillPoly(img, pts =co_img, color=(0,255,0)) - img_poly=cv2.fillPoly(img, pts =co_sep, color=(0,0,255)) - img_poly=cv2.fillPoly(img, pts =co_table, color=(0,255,255)) - - try: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) + + img = np.zeros((y_len, x_len, 3)) + img_poly = cv2.fillPoly(img, pts=co_text, color=(255, 0, 0)) + img_poly = cv2.fillPoly(img, pts=co_img, color=(0, 255, 0)) + img_poly = cv2.fillPoly(img, pts=co_sep, color=(0, 0, 255)) + img_poly = cv2.fillPoly(img, pts=co_table, color=(0, 255, 255)) + + try: + cv2.imwrite(self.output_dir + '/' + self.gt_list[index].split('-')[1].split('.')[0] + '.png', + img_poly) except: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) + cv2.imwrite(self.output_dir + '/' + self.gt_list[index].split('.')[0] + '.png', img_poly) except: pass - np.savetxt(self.output_dir+'/../classes.txt',classes) - - if self.output_type=='2d' or self.output_type=='2D': + np.savetxt(self.output_dir + '/../classes.txt', classes) + + if self.output_type == '2d' or self.output_type == '2D': for index in tqdm(range(len(self.gt_list))): try: - tree1 = ET.parse(self.dir+'/'+self.gt_list[index]) - root1=tree1.getroot() - alltags=[elem.tag for elem in root1.iter()] - link=alltags[0].split('}')[0]+'}' - - region_tags=np.unique([x for x in alltags if x.endswith('Region')]) - - for jj in root1.iter(link+'Page'): - y_len=int(jj.attrib['imageHeight']) - x_len=int(jj.attrib['imageWidth']) - - co_text=[] - co_sep=[] - co_img=[] - co_table=[] + tree1 = ET.parse(self.dir + '/' + self.gt_list[index]) + root1 = tree1.getroot() + alltags = [elem.tag for elem in root1.iter()] + link = alltags[0].split('}')[0] + '}' + + region_tags = np.unique([x for x in alltags if x.endswith('Region')]) + + for jj in root1.iter(link + 'Page'): + y_len = int(jj.attrib['imageHeight']) + x_len = int(jj.attrib['imageWidth']) + + co_text = [] + co_sep = [] + co_img = [] + co_table = [] for tag in region_tags: - if tag.endswith('}TextRegion') or tag.endswith('}Textregion') or tag.endswith('}textRegion') or tag.endswith('}textregion'): - + if tag.endswith('}TextRegion') or tag.endswith('}Textregion') or tag.endswith( + '}textRegion') or tag.endswith('}textregion'): + for nn in root1.iter(tag): - for co_it in nn.iter(link+'Coords'): - if bool(co_it.attrib)==False: - c_t_in=[] - for ll in nn.iter(link+'Point'): - c_t_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + for co_it in nn.iter(link + 'Coords'): + if bool(co_it.attrib) == False: + c_t_in = [] + for ll in nn.iter(link + 'Point'): + c_t_in.append( + [int(np.float(ll.attrib['x'])), int(np.float(ll.attrib['y']))]) co_text.append(np.array(c_t_in)) print(co_text) - elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): - p_h=co_it.attrib['points'].split(' ') - co_text.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + elif bool(co_it.attrib) == True and 'points' in co_it.attrib.keys(): + p_h = co_it.attrib['points'].split(' ') + co_text.append( + np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) - - elif tag.endswith('}ImageRegion') or tag.endswith('}Imageregion') or tag.endswith('}imageRegion') or tag.endswith('}imageregion'): + elif tag.endswith('}ImageRegion') or tag.endswith('}Imageregion') or tag.endswith( + '}imageRegion') or tag.endswith('}imageregion'): for nn in root1.iter(tag): - for co_it in nn.iter(link+'Coords'): - if bool(co_it.attrib)==False: - c_i_in=[] - for ll in nn.iter(link+'Point'): - c_i_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + for co_it in nn.iter(link + 'Coords'): + if bool(co_it.attrib) == False: + c_i_in = [] + for ll in nn.iter(link + 'Point'): + c_i_in.append( + [int(np.float(ll.attrib['x'])), int(np.float(ll.attrib['y']))]) co_img.append(np.array(c_i_in)) - elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): - p_h=co_it.attrib['points'].split(' ') - co_img.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - - elif tag.endswith('}SeparatorRegion') or tag.endswith('}Separatorregion') or tag.endswith('}separatorRegion') or tag.endswith('}separatorregion'): + elif bool(co_it.attrib) == True and 'points' in co_it.attrib.keys(): + p_h = co_it.attrib['points'].split(' ') + co_img.append( + np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) + + elif tag.endswith('}SeparatorRegion') or tag.endswith('}Separatorregion') or tag.endswith( + '}separatorRegion') or tag.endswith('}separatorregion'): for nn in root1.iter(tag): - for co_it in nn.iter(link+'Coords'): - if bool(co_it.attrib)==False: - c_s_in=[] - for ll in nn.iter(link+'Point'): - c_s_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + for co_it in nn.iter(link + 'Coords'): + if bool(co_it.attrib) == False: + c_s_in = [] + for ll in nn.iter(link + 'Point'): + c_s_in.append( + [int(np.float(ll.attrib['x'])), int(np.float(ll.attrib['y']))]) co_sep.append(np.array(c_s_in)) - - elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): - p_h=co_it.attrib['points'].split(' ') - co_sep.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) - - elif tag.endswith('}TableRegion') or tag.endswith('}tableRegion') or tag.endswith('}Tableregion') or tag.endswith('}tableregion'): + + elif bool(co_it.attrib) == True and 'points' in co_it.attrib.keys(): + p_h = co_it.attrib['points'].split(' ') + co_sep.append( + np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) + + elif tag.endswith('}TableRegion') or tag.endswith('}tableRegion') or tag.endswith( + '}Tableregion') or tag.endswith('}tableregion'): for nn in root1.iter(tag): - for co_it in nn.iter(link+'Coords'): - if bool(co_it.attrib)==False: - c_ta_in=[] - for ll in nn.iter(link+'Point'): - c_ta_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) + for co_it in nn.iter(link + 'Coords'): + if bool(co_it.attrib) == False: + c_ta_in = [] + for ll in nn.iter(link + 'Point'): + c_ta_in.append( + [int(np.float(ll.attrib['x'])), int(np.float(ll.attrib['y']))]) co_table.append(np.array(c_ta_in)) - - elif bool(co_it.attrib)==True and 'points' in co_it.attrib.keys(): - p_h=co_it.attrib['points'].split(' ') - co_table.append( np.array( [ [ int(x.split(',')[0]) , int(x.split(',')[1]) ] for x in p_h] ) ) + + elif bool(co_it.attrib) == True and 'points' in co_it.attrib.keys(): + p_h = co_it.attrib['points'].split(' ') + co_table.append( + np.array([[int(x.split(',')[0]), int(x.split(',')[1])] for x in p_h])) else: pass - - img = np.zeros( (y_len,x_len) ) - img_poly=cv2.fillPoly(img, pts =co_text, color=(1,1,1)) - img_poly=cv2.fillPoly(img, pts =co_img, color=(2,2,2)) - img_poly=cv2.fillPoly(img, pts =co_sep, color=(3,3,3)) - img_poly=cv2.fillPoly(img, pts =co_table, color=(4,4,4)) - try: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) + + img = np.zeros((y_len, x_len)) + img_poly = cv2.fillPoly(img, pts=co_text, color=(1, 1, 1)) + img_poly = cv2.fillPoly(img, pts=co_img, color=(2, 2, 2)) + img_poly = cv2.fillPoly(img, pts=co_sep, color=(3, 3, 3)) + img_poly = cv2.fillPoly(img, pts=co_table, color=(4, 4, 4)) + try: + cv2.imwrite(self.output_dir + '/' + self.gt_list[index].split('-')[1].split('.')[0] + '.png', + img_poly) except: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) + cv2.imwrite(self.output_dir + '/' + self.gt_list[index].split('.')[0] + '.png', img_poly) except: pass + def run(self): self.get_content_of_dir() self.get_images_of_ground_truth() + + def main(): - parser=argparse.ArgumentParser() - - parser.add_argument('-dir_in','--dir_in', dest='inp1', default=None, help='directory of page-xml files') - parser.add_argument('-dir_out','--dir_out', dest='inp2', default=None, help='directory where ground truth images would be written') - parser.add_argument('-type','--type', dest='inp3', default=None, help='this defines how output should be. A 2d image array or a 3d image array encoded with RGB color. Just pass 2d or 3d. The file will be saved one directory up. 2D image array is 3d but only information of one channel would be enough since all channels have the same values.') - options=parser.parse_args() - - possibles=globals() + parser = argparse.ArgumentParser() + + parser.add_argument('-dir_in', '--dir_in', dest='inp1', default=None, help='directory of page-xml files') + parser.add_argument('-dir_out', '--dir_out', dest='inp2', default=None, + help='directory where ground truth images would be written') + parser.add_argument('-type', '--type', dest='inp3', default=None, + help='this defines how output should be. A 2d image array or a 3d image array encoded with RGB color. Just pass 2d or 3d. The file will be saved one directory up. 2D image array is 3d but only information of one channel would be enough since all channels have the same values.') + options = parser.parse_args() + + possibles = globals() possibles.update(locals()) - x=pagexml2img(options.inp1,options.inp2,options.inp3) + x = pagexml2img(options.inp1, options.inp2, options.inp3) x.run() -if __name__=="__main__": - main() - - + +if __name__ == "__main__": + main() diff --git a/eynollah/eynollah/train/train.py b/eynollah/eynollah/train/train.py index 0cc5ef3..0881182 100644 --- a/eynollah/eynollah/train/train.py +++ b/eynollah/eynollah/train/train.py @@ -2,7 +2,7 @@ import os import sys import tensorflow as tf from keras.backend.tensorflow_backend import set_session -import keras , warnings +import keras, warnings from keras.optimizers import * from sacred import Experiment from models import * @@ -11,20 +11,21 @@ from metrics import * from keras.models import load_model from tqdm import tqdm + def configuration(): keras.backend.clear_session() tf.reset_default_graph() warnings.filterwarnings('ignore') - - os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID' + + os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) - - + config.gpu_options.allow_growth = True - config.gpu_options.per_process_gpu_memory_fraction=0.95#0.95 - config.gpu_options.visible_device_list="0" + config.gpu_options.per_process_gpu_memory_fraction = 0.95 # 0.95 + config.gpu_options.visible_device_list = "0" set_session(tf.Session(config=config)) + def get_dirs_or_files(input_data): if os.path.isdir(input_data): image_input, labels_input = os.path.join(input_data, 'images/'), os.path.join(input_data, 'labels/') @@ -33,206 +34,188 @@ def get_dirs_or_files(input_data): assert os.path.isdir(labels_input), "{} is not a directory".format(labels_input) return image_input, labels_input + ex = Experiment() + @ex.config def config_params(): - n_classes=None # Number of classes. If your case study is binary case the set it to 2 and otherwise give your number of cases. - n_epochs=1 - input_height=224*1 - input_width=224*1 - weight_decay=1e-6 # Weight decay of l2 regularization of model layers. - n_batch=1 # Number of batches at each iteration. - learning_rate=1e-4 - patches=False # Make patches of image in order to use all information of image. In the case of page + n_classes = None # Number of classes. If your case study is binary case the set it to 2 and otherwise give your number of cases. + n_epochs = 1 + input_height = 224 * 1 + input_width = 224 * 1 + weight_decay = 1e-6 # Weight decay of l2 regularization of model layers. + n_batch = 1 # Number of batches at each iteration. + learning_rate = 1e-4 + patches = False # Make patches of image in order to use all information of image. In the case of page # extraction this should be set to false since model should see all image. - augmentation=False - flip_aug=False # Flip image (augmentation). - blur_aug=False # Blur patches of image (augmentation). - scaling=False # Scaling of patches (augmentation) will be imposed if this set to true. - binarization=False # Otsu thresholding. Used for augmentation in the case of binary case like textline prediction. For multicases should not be applied. - dir_train=None # Directory of training dataset (sub-folders should be named images and labels). - dir_eval=None # Directory of validation dataset (sub-folders should be named images and labels). - dir_output=None # Directory of output where the model should be saved. - pretraining=False # Set true to load pretrained weights of resnet50 encoder. - scaling_bluring=False - scaling_binarization=False - scaling_flip=False - thetha=[10,-10] - blur_k=['blur','guass','median'] # Used in order to blur image. Used for augmentation. - scales= [ 0.5, 2 ] # Scale patches with these scales. Used for augmentation. - flip_index=[0,1,-1] # Flip image. Used for augmentation. - continue_training = False # If + augmentation = False + flip_aug = False # Flip image (augmentation). + blur_aug = False # Blur patches of image (augmentation). + scaling = False # Scaling of patches (augmentation) will be imposed if this set to true. + binarization = False # Otsu thresholding. Used for augmentation in the case of binary case like textline prediction. For multicases should not be applied. + dir_train = None # Directory of training dataset (sub-folders should be named images and labels). + dir_eval = None # Directory of validation dataset (sub-folders should be named images and labels). + dir_output = None # Directory of output where the model should be saved. + pretraining = False # Set true to load pretrained weights of resnet50 encoder. + scaling_bluring = False + scaling_binarization = False + scaling_flip = False + thetha = [10, -10] + blur_k = ['blur', 'guass', 'median'] # Used in order to blur image. Used for augmentation. + scales = [0.5, 2] # Scale patches with these scales. Used for augmentation. + flip_index = [0, 1, -1] # Flip image. Used for augmentation. + continue_training = False # If index_start = 0 dir_of_start_model = '' is_loss_soft_dice = False weighted_loss = False data_is_provided = False + @ex.automain -def run(n_classes,n_epochs,input_height, - input_width,weight_decay,weighted_loss, - index_start,dir_of_start_model,is_loss_soft_dice, - n_batch,patches,augmentation,flip_aug - ,blur_aug,scaling, binarization, - blur_k,scales,dir_train,data_is_provided, - scaling_bluring,scaling_binarization,rotation, - rotation_not_90,thetha,scaling_flip,continue_training, - flip_index,dir_eval ,dir_output,pretraining,learning_rate): - - +def run(n_classes, n_epochs, input_height, + input_width, weight_decay, weighted_loss, + index_start, dir_of_start_model, is_loss_soft_dice, + n_batch, patches, augmentation, flip_aug, + blur_aug, scaling, binarization, + blur_k, scales, dir_train, data_is_provided, + scaling_bluring, scaling_binarization, rotation, + rotation_not_90, thetha, scaling_flip, continue_training, + flip_index, dir_eval, dir_output, pretraining, learning_rate): if data_is_provided: - dir_train_flowing=os.path.join(dir_output,'train') - dir_eval_flowing=os.path.join(dir_output,'eval') - - dir_flow_train_imgs=os.path.join(dir_train_flowing,'images') - dir_flow_train_labels=os.path.join(dir_train_flowing,'labels') - - dir_flow_eval_imgs=os.path.join(dir_eval_flowing,'images') - dir_flow_eval_labels=os.path.join(dir_eval_flowing,'labels') - + dir_train_flowing = os.path.join(dir_output, 'train') + dir_eval_flowing = os.path.join(dir_output, 'eval') + + dir_flow_train_imgs = os.path.join(dir_train_flowing, 'images') + dir_flow_train_labels = os.path.join(dir_train_flowing, 'labels') + + dir_flow_eval_imgs = os.path.join(dir_eval_flowing, 'images') + dir_flow_eval_labels = os.path.join(dir_eval_flowing, 'labels') + configuration() - + else: - dir_img,dir_seg=get_dirs_or_files(dir_train) - dir_img_val,dir_seg_val=get_dirs_or_files(dir_eval) - + dir_img, dir_seg = get_dirs_or_files(dir_train) + dir_img_val, dir_seg_val = get_dirs_or_files(dir_eval) + # make first a directory in output for both training and evaluations in order to flow data from these directories. - dir_train_flowing=os.path.join(dir_output,'train') - dir_eval_flowing=os.path.join(dir_output,'eval') - - dir_flow_train_imgs=os.path.join(dir_train_flowing,'images/') - dir_flow_train_labels=os.path.join(dir_train_flowing,'labels/') - - dir_flow_eval_imgs=os.path.join(dir_eval_flowing,'images/') - dir_flow_eval_labels=os.path.join(dir_eval_flowing,'labels/') - + dir_train_flowing = os.path.join(dir_output, 'train') + dir_eval_flowing = os.path.join(dir_output, 'eval') + + dir_flow_train_imgs = os.path.join(dir_train_flowing, 'images/') + dir_flow_train_labels = os.path.join(dir_train_flowing, 'labels/') + + dir_flow_eval_imgs = os.path.join(dir_eval_flowing, 'images/') + dir_flow_eval_labels = os.path.join(dir_eval_flowing, 'labels/') + if os.path.isdir(dir_train_flowing): - os.system('rm -rf '+dir_train_flowing) + os.system('rm -rf ' + dir_train_flowing) os.makedirs(dir_train_flowing) else: os.makedirs(dir_train_flowing) - + if os.path.isdir(dir_eval_flowing): - os.system('rm -rf '+dir_eval_flowing) + os.system('rm -rf ' + dir_eval_flowing) os.makedirs(dir_eval_flowing) else: os.makedirs(dir_eval_flowing) - os.mkdir(dir_flow_train_imgs) os.mkdir(dir_flow_train_labels) - + os.mkdir(dir_flow_eval_imgs) os.mkdir(dir_flow_eval_labels) - - - #set the gpu configuration - configuration() + # set the gpu configuration + configuration() - #writing patches into a sub-folder in order to be flowed from directory. - provide_patches(dir_img,dir_seg,dir_flow_train_imgs, + # writing patches into a sub-folder in order to be flowed from directory. + provide_patches(dir_img, dir_seg, dir_flow_train_imgs, dir_flow_train_labels, - input_height,input_width,blur_k,blur_aug, - flip_aug,binarization,scaling,scales,flip_index, - scaling_bluring,scaling_binarization,rotation, - rotation_not_90,thetha,scaling_flip, - augmentation=augmentation,patches=patches) - - provide_patches(dir_img_val,dir_seg_val,dir_flow_eval_imgs, + input_height, input_width, blur_k, blur_aug, + flip_aug, binarization, scaling, scales, flip_index, + scaling_bluring, scaling_binarization, rotation, + rotation_not_90, thetha, scaling_flip, + augmentation=augmentation, patches=patches) + + provide_patches(dir_img_val, dir_seg_val, dir_flow_eval_imgs, dir_flow_eval_labels, - input_height,input_width,blur_k,blur_aug, - flip_aug,binarization,scaling,scales,flip_index, - scaling_bluring,scaling_binarization,rotation, - rotation_not_90,thetha,scaling_flip, - augmentation=False,patches=patches) - - - + input_height, input_width, blur_k, blur_aug, + flip_aug, binarization, scaling, scales, flip_index, + scaling_bluring, scaling_binarization, rotation, + rotation_not_90, thetha, scaling_flip, + augmentation=False, patches=patches) + if weighted_loss: - weights=np.zeros(n_classes) + weights = np.zeros(n_classes) if data_is_provided: for obj in os.listdir(dir_flow_train_labels): try: - label_obj=cv2.imread(dir_flow_train_labels+'/'+obj) - label_obj_one_hot=get_one_hot( label_obj,label_obj.shape[0],label_obj.shape[1],n_classes) - weights+=(label_obj_one_hot.sum(axis=0)).sum(axis=0) + label_obj = cv2.imread(dir_flow_train_labels + '/' + obj) + label_obj_one_hot = get_one_hot(label_obj, label_obj.shape[0], label_obj.shape[1], n_classes) + weights += (label_obj_one_hot.sum(axis=0)).sum(axis=0) except: pass else: - + for obj in os.listdir(dir_seg): try: - label_obj=cv2.imread(dir_seg+'/'+obj) - label_obj_one_hot=get_one_hot( label_obj,label_obj.shape[0],label_obj.shape[1],n_classes) - weights+=(label_obj_one_hot.sum(axis=0)).sum(axis=0) + label_obj = cv2.imread(dir_seg + '/' + obj) + label_obj_one_hot = get_one_hot(label_obj, label_obj.shape[0], label_obj.shape[1], n_classes) + weights += (label_obj_one_hot.sum(axis=0)).sum(axis=0) except: pass - - - weights=1.00/weights - - weights=weights/float(np.sum(weights)) - weights=weights/float(np.min(weights)) - weights=weights/float(np.sum(weights)) - - - + + weights = 1.00 / weights + + weights = weights / float(np.sum(weights)) + weights = weights / float(np.min(weights)) + weights = weights / float(np.sum(weights)) + if continue_training: if is_loss_soft_dice: - model = load_model (dir_of_start_model, compile = True, custom_objects={'soft_dice_loss': soft_dice_loss}) + model = load_model(dir_of_start_model, compile=True, custom_objects={'soft_dice_loss': soft_dice_loss}) if weighted_loss: - model = load_model (dir_of_start_model, compile = True, custom_objects={'loss': weighted_categorical_crossentropy(weights)}) + model = load_model(dir_of_start_model, compile=True, + custom_objects={'loss': weighted_categorical_crossentropy(weights)}) if not is_loss_soft_dice and not weighted_loss: - model = load_model (dir_of_start_model, compile = True) + model = load_model(dir_of_start_model, compile=True) else: - #get our model. + # get our model. index_start = 0 - model = resnet50_unet(n_classes, input_height, input_width,weight_decay,pretraining) - - #if you want to see the model structure just uncomment model summary. - #model.summary() - + model = resnet50_unet(n_classes, input_height, input_width, weight_decay, pretraining) + + # if you want to see the model structure just uncomment model summary. + # model.summary() if not is_loss_soft_dice and not weighted_loss: model.compile(loss='categorical_crossentropy', - optimizer = Adam(lr=learning_rate),metrics=['accuracy']) - if is_loss_soft_dice: + optimizer=Adam(lr=learning_rate), metrics=['accuracy']) + if is_loss_soft_dice: model.compile(loss=soft_dice_loss, - optimizer = Adam(lr=learning_rate),metrics=['accuracy']) - + optimizer=Adam(lr=learning_rate), metrics=['accuracy']) + if weighted_loss: model.compile(loss=weighted_categorical_crossentropy(weights), - optimizer = Adam(lr=learning_rate),metrics=['accuracy']) - - #generating train and evaluation data - train_gen = data_gen(dir_flow_train_imgs,dir_flow_train_labels, batch_size = n_batch, - input_height=input_height, input_width=input_width,n_classes=n_classes ) - val_gen = data_gen(dir_flow_eval_imgs,dir_flow_eval_labels, batch_size = n_batch, - input_height=input_height, input_width=input_width,n_classes=n_classes ) - - for i in tqdm(range(index_start, n_epochs+index_start)): + optimizer=Adam(lr=learning_rate), metrics=['accuracy']) + + # generating train and evaluation data + train_gen = data_gen(dir_flow_train_imgs, dir_flow_train_labels, batch_size=n_batch, + input_height=input_height, input_width=input_width, n_classes=n_classes) + val_gen = data_gen(dir_flow_eval_imgs, dir_flow_eval_labels, batch_size=n_batch, + input_height=input_height, input_width=input_width, n_classes=n_classes) + + for i in tqdm(range(index_start, n_epochs + index_start)): model.fit_generator( train_gen, - steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs))/n_batch)-1, + steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs)) / n_batch) - 1, validation_data=val_gen, validation_steps=1, epochs=1) - model.save(dir_output+'/'+'model_'+str(i)+'.h5') - - - #os.system('rm -rf '+dir_train_flowing) - #os.system('rm -rf '+dir_eval_flowing) - - #model.save(dir_output+'/'+'model'+'.h5') - - - - - - - - + model.save(dir_output + '/' + 'model_' + str(i) + '.h5') + # os.system('rm -rf '+dir_train_flowing) + # os.system('rm -rf '+dir_eval_flowing) + # model.save(dir_output+'/'+'model'+'.h5') diff --git a/eynollah/eynollah/train/utils.py b/eynollah/eynollah/train/utils.py index 19ab46e..64263f4 100644 --- a/eynollah/eynollah/train/utils.py +++ b/eynollah/eynollah/train/utils.py @@ -10,18 +10,17 @@ import imutils import math - -def bluring(img_in,kind): - if kind=='guass': - img_blur = cv2.GaussianBlur(img_in,(5,5),0) - elif kind=="median": - img_blur = cv2.medianBlur(img_in,5) - elif kind=='blur': - img_blur=cv2.blur(img_in,(5,5)) +def bluring(img_in, kind): + if kind == 'guass': + img_blur = cv2.GaussianBlur(img_in, (5, 5), 0) + elif kind == "median": + img_blur = cv2.medianBlur(img_in, 5) + elif kind == 'blur': + img_blur = cv2.blur(img_in, (5, 5)) return img_blur -def elastic_transform(image, alpha, sigma,seedj, random_state=None): - + +def elastic_transform(image, alpha, sigma, seedj, random_state=None): """Elastic deformation of images as described in [Simard2003]_. .. [Simard2003] Simard, Steinkraus and Platt, "Best Practices for Convolutional Neural Networks applied to Visual Document Analysis", in @@ -37,461 +36,459 @@ def elastic_transform(image, alpha, sigma,seedj, random_state=None): dz = np.zeros_like(dx) x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]), np.arange(shape[2])) - indices = np.reshape(y+dy, (-1, 1)), np.reshape(x+dx, (-1, 1)), np.reshape(z, (-1, 1)) + indices = np.reshape(y + dy, (-1, 1)), np.reshape(x + dx, (-1, 1)), np.reshape(z, (-1, 1)) distored_image = map_coordinates(image, indices, order=1, mode='reflect') return distored_image.reshape(image.shape) + def rotation_90(img): - img_rot=np.zeros((img.shape[1],img.shape[0],img.shape[2])) - img_rot[:,:,0]=img[:,:,0].T - img_rot[:,:,1]=img[:,:,1].T - img_rot[:,:,2]=img[:,:,2].T + img_rot = np.zeros((img.shape[1], img.shape[0], img.shape[2])) + img_rot[:, :, 0] = img[:, :, 0].T + img_rot[:, :, 1] = img[:, :, 1].T + img_rot[:, :, 2] = img[:, :, 2].T return img_rot + def rotatedRectWithMaxArea(w, h, angle): - """ + """ Given a rectangle of size wxh that has been rotated by 'angle' (in radians), computes the width and height of the largest possible axis-aligned rectangle (maximal area) within the rotated rectangle. """ - if w <= 0 or h <= 0: - return 0,0 - - width_is_longer = w >= h - side_long, side_short = (w,h) if width_is_longer else (h,w) - - # since the solutions for angle, -angle and 180-angle are all the same, - # if suffices to look at the first quadrant and the absolute values of sin,cos: - sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle)) - if side_short <= 2.*sin_a*cos_a*side_long or abs(sin_a-cos_a) < 1e-10: - # half constrained case: two crop corners touch the longer side, - # the other two corners are on the mid-line parallel to the longer line - x = 0.5*side_short - wr,hr = (x/sin_a,x/cos_a) if width_is_longer else (x/cos_a,x/sin_a) - else: - # fully constrained case: crop touches all 4 sides - cos_2a = cos_a*cos_a - sin_a*sin_a - wr,hr = (w*cos_a - h*sin_a)/cos_2a, (h*cos_a - w*sin_a)/cos_2a - - return wr,hr - -def rotate_max_area(image,rotated, rotated_label,angle): + if w <= 0 or h <= 0: + return 0, 0 + + width_is_longer = w >= h + side_long, side_short = (w, h) if width_is_longer else (h, w) + + # since the solutions for angle, -angle and 180-angle are all the same, + # if suffices to look at the first quadrant and the absolute values of sin,cos: + sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle)) + if side_short <= 2. * sin_a * cos_a * side_long or abs(sin_a - cos_a) < 1e-10: + # half constrained case: two crop corners touch the longer side, + # the other two corners are on the mid-line parallel to the longer line + x = 0.5 * side_short + wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a) + else: + # fully constrained case: crop touches all 4 sides + cos_2a = cos_a * cos_a - sin_a * sin_a + wr, hr = (w * cos_a - h * sin_a) / cos_2a, (h * cos_a - w * sin_a) / cos_2a + + return wr, hr + + +def rotate_max_area(image, rotated, rotated_label, angle): """ image: cv2 image matrix object angle: in degree """ wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle)) h, w, _ = rotated.shape - y1 = h//2 - int(hr/2) + y1 = h // 2 - int(hr / 2) y2 = y1 + int(hr) - x1 = w//2 - int(wr/2) + x1 = w // 2 - int(wr / 2) x2 = x1 + int(wr) - return rotated[y1:y2, x1:x2],rotated_label[y1:y2, x1:x2] -def rotation_not_90_func(img,label,thetha): - rotated=imutils.rotate(img,thetha) - rotated_label=imutils.rotate(label,thetha) - return rotate_max_area(img, rotated,rotated_label,thetha) + return rotated[y1:y2, x1:x2], rotated_label[y1:y2, x1:x2] + + +def rotation_not_90_func(img, label, thetha): + rotated = imutils.rotate(img, thetha) + rotated_label = imutils.rotate(label, thetha) + return rotate_max_area(img, rotated, rotated_label, thetha) + def color_images(seg, n_classes): - ann_u=range(n_classes) - if len(np.shape(seg))==3: - seg=seg[:,:,0] - - seg_img=np.zeros((np.shape(seg)[0],np.shape(seg)[1],3)).astype(float) - colors=sns.color_palette("hls", n_classes) - + ann_u = range(n_classes) + if len(np.shape(seg)) == 3: + seg = seg[:, :, 0] + + seg_img = np.zeros((np.shape(seg)[0], np.shape(seg)[1], 3)).astype(float) + colors = sns.color_palette("hls", n_classes) + for c in ann_u: - c=int(c) - segl=(seg==c) - seg_img[:,:,0]+=segl*(colors[c][0]) - seg_img[:,:,1]+=segl*(colors[c][1]) - seg_img[:,:,2]+=segl*(colors[c][2]) + c = int(c) + segl = (seg == c) + seg_img[:, :, 0] += segl * (colors[c][0]) + seg_img[:, :, 1] += segl * (colors[c][1]) + seg_img[:, :, 2] += segl * (colors[c][2]) return seg_img - -def resize_image(seg_in,input_height,input_width): - return cv2.resize(seg_in,(input_width,input_height),interpolation=cv2.INTER_NEAREST) -def get_one_hot(seg,input_height,input_width,n_classes): - seg=seg[:,:,0] - seg_f=np.zeros((input_height, input_width,n_classes)) + +def resize_image(seg_in, input_height, input_width): + return cv2.resize(seg_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) + + +def get_one_hot(seg, input_height, input_width, n_classes): + seg = seg[:, :, 0] + seg_f = np.zeros((input_height, input_width, n_classes)) for j in range(n_classes): - seg_f[:,:,j]=(seg==j).astype(int) + seg_f[:, :, j] = (seg == j).astype(int) return seg_f - -def IoU(Yi,y_predi): - ## mean Intersection over Union - ## Mean IoU = TP/(FN + TP + FP) + +def IoU(Yi, y_predi): + # mean Intersection over Union + # Mean IoU = TP/(FN + TP + FP) IoUs = [] - classes_true=np.unique(Yi) + classes_true = np.unique(Yi) for c in classes_true: - TP = np.sum( (Yi == c)&(y_predi==c) ) - FP = np.sum( (Yi != c)&(y_predi==c) ) - FN = np.sum( (Yi == c)&(y_predi != c)) - IoU = TP/float(TP + FP + FN) - print("class {:02.0f}: #TP={:6.0f}, #FP={:6.0f}, #FN={:5.0f}, IoU={:4.3f}".format(c,TP,FP,FN,IoU)) + TP = np.sum((Yi == c) & (y_predi == c)) + FP = np.sum((Yi != c) & (y_predi == c)) + FN = np.sum((Yi == c) & (y_predi != c)) + IoU = TP / float(TP + FP + FN) + print("class {:02.0f}: #TP={:6.0f}, #FP={:6.0f}, #FN={:5.0f}, IoU={:4.3f}".format(c, TP, FP, FN, IoU)) IoUs.append(IoU) mIoU = np.mean(IoUs) print("_________________") print("Mean IoU: {:4.3f}".format(mIoU)) return mIoU -def data_gen(img_folder, mask_folder, batch_size,input_height, input_width,n_classes): + + +def data_gen(img_folder, mask_folder, batch_size, input_height, input_width, n_classes): c = 0 - n = [f for f in os.listdir(img_folder) if not f.startswith('.')]# os.listdir(img_folder) #List of training images + n = [f for f in os.listdir(img_folder) if not f.startswith('.')] # os.listdir(img_folder) #List of training images random.shuffle(n) while True: img = np.zeros((batch_size, input_height, input_width, 3)).astype('float') mask = np.zeros((batch_size, input_height, input_width, n_classes)).astype('float') - - for i in range(c, c+batch_size): #initially from 0 to 16, c = 0. - #print(img_folder+'/'+n[i]) - + + for i in range(c, c + batch_size): # initially from 0 to 16, c = 0. + # print(img_folder+'/'+n[i]) + try: - filename=n[i].split('.')[0] - - train_img = cv2.imread(img_folder+'/'+n[i])/255. - train_img = cv2.resize(train_img, (input_width, input_height),interpolation=cv2.INTER_NEAREST)# Read an image from folder and resize - - img[i-c] = train_img #add to array - img[0], img[1], and so on. - train_mask = cv2.imread(mask_folder+'/'+filename+'.png') - #print(mask_folder+'/'+filename+'.png') - #print(train_mask.shape) - train_mask = get_one_hot( resize_image(train_mask,input_height,input_width),input_height,input_width,n_classes) - #train_mask = train_mask.reshape(224, 224, 1) # Add extra dimension for parity with train_img size [512 * 512 * 3] - - mask[i-c] = train_mask + filename = n[i].split('.')[0] + + train_img = cv2.imread(img_folder + '/' + n[i]) / 255. + train_img = cv2.resize(train_img, (input_width, input_height), + interpolation=cv2.INTER_NEAREST) # Read an image from folder and resize + + img[i - c] = train_img # add to array - img[0], img[1], and so on. + train_mask = cv2.imread(mask_folder + '/' + filename + '.png') + # print(mask_folder+'/'+filename+'.png') + # print(train_mask.shape) + train_mask = get_one_hot(resize_image(train_mask, input_height, input_width), input_height, input_width, + n_classes) + # train_mask = train_mask.reshape(224, 224, 1) # Add extra dimension for parity with train_img size [512 * 512 * 3] + + mask[i - c] = train_mask except: - img[i-c] = np.ones((input_height, input_width, 3)).astype('float') - mask[i-c] = np.zeros((input_height, input_width, n_classes)).astype('float') - - - - c+=batch_size - if(c+batch_size>=len(os.listdir(img_folder))): - c=0 + img[i - c] = np.ones((input_height, input_width, 3)).astype('float') + mask[i - c] = np.zeros((input_height, input_width, n_classes)).astype('float') + + c += batch_size + if c + batch_size >= len(os.listdir(img_folder)): + c = 0 random.shuffle(n) yield img, mask - + + def otsu_copy(img): - img_r=np.zeros(img.shape) - img1=img[:,:,0] - img2=img[:,:,1] - img3=img[:,:,2] - _, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) - _, threshold2 = cv2.threshold(img2, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) - _, threshold3 = cv2.threshold(img3, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) - img_r[:,:,0]=threshold1 - img_r[:,:,1]=threshold1 - img_r[:,:,2]=threshold1 + img_r = np.zeros(img.shape) + img1 = img[:, :, 0] + img2 = img[:, :, 1] + img3 = img[:, :, 2] + _, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + _, threshold2 = cv2.threshold(img2, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + _, threshold3 = cv2.threshold(img3, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + img_r[:, :, 0] = threshold1 + img_r[:, :, 1] = threshold1 + img_r[:, :, 2] = threshold1 return img_r -def get_patches(dir_img_f,dir_seg_f,img,label,height,width,indexer): - - if img.shape[0]int(nxf): - nxf=int(nxf)+1 - if nyf>int(nyf): - nyf=int(nyf)+1 - - nxf=int(nxf) - nyf=int(nyf) - + + +def get_patches(dir_img_f, dir_seg_f, img, label, height, width, indexer): + if img.shape[0] < height or img.shape[1] < width: + img, label = do_padding(img, label, height, width) + + img_h = img.shape[0] + img_w = img.shape[1] + + nxf = img_w / float(width) + nyf = img_h / float(height) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + if nyf > int(nyf): + nyf = int(nyf) + 1 + + nxf = int(nxf) + nyf = int(nyf) + for i in range(nxf): for j in range(nyf): - index_x_d=i*width - index_x_u=(i+1)*width - - index_y_d=j*height - index_y_u=(j+1)*height - - if index_x_u>img_w: - index_x_u=img_w - index_x_d=img_w-width - if index_y_u>img_h: - index_y_u=img_h - index_y_d=img_h-height - - - img_patch=img[index_y_d:index_y_u,index_x_d:index_x_u,:] - label_patch=label[index_y_d:index_y_u,index_x_d:index_x_u,:] - - cv2.imwrite(dir_img_f+'/img_'+str(indexer)+'.png', img_patch ) - cv2.imwrite(dir_seg_f+'/img_'+str(indexer)+'.png' , label_patch ) - indexer+=1 - + index_x_d = i * width + index_x_u = (i + 1) * width + + index_y_d = j * height + index_y_u = (j + 1) * height + + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - width + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - height + + img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + label_patch = label[index_y_d:index_y_u, index_x_d:index_x_u, :] + + cv2.imwrite(dir_img_f + '/img_' + str(indexer) + '.png', img_patch) + cv2.imwrite(dir_seg_f + '/img_' + str(indexer) + '.png', label_patch) + indexer += 1 + return indexer -def do_padding(img,label,height,width): - - height_new=img.shape[0] - width_new=img.shape[1] - - h_start=0 - w_start=0 - - if img.shape[0]int(nxf): - nxf=int(nxf)+1 - if nyf>int(nyf): - nyf=int(nyf)+1 - - nxf=int(nxf) - nyf=int(nyf) - + +def do_padding(img, label, height, width): + height_new = img.shape[0] + width_new = img.shape[1] + + h_start = 0 + w_start = 0 + + if img.shape[0] < height: + h_start = int(abs(height - img.shape[0]) / 2.) + height_new = height + + if img.shape[1] < width: + w_start = int(abs(width - img.shape[1]) / 2.) + width_new = width + + img_new = np.ones((height_new, width_new, img.shape[2])).astype(float) * 255 + label_new = np.zeros((height_new, width_new, label.shape[2])).astype(float) + + img_new[h_start:h_start + img.shape[0], w_start:w_start + img.shape[1], :] = np.copy(img[:, :, :]) + label_new[h_start:h_start + label.shape[0], w_start:w_start + label.shape[1], :] = np.copy(label[:, :, :]) + + return img_new, label_new + + +def get_patches_num_scale(dir_img_f, dir_seg_f, img, label, height, width, indexer, n_patches, scaler): + if img.shape[0] < height or img.shape[1] < width: + img, label = do_padding(img, label, height, width) + + img_h = img.shape[0] + img_w = img.shape[1] + + height_scale = int(height * scaler) + width_scale = int(width * scaler) + + nxf = img_w / float(width_scale) + nyf = img_h / float(height_scale) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + if nyf > int(nyf): + nyf = int(nyf) + 1 + + nxf = int(nxf) + nyf = int(nyf) + for i in range(nxf): for j in range(nyf): - index_x_d=i*width_scale - index_x_u=(i+1)*width_scale - - index_y_d=j*height_scale - index_y_u=(j+1)*height_scale - - if index_x_u>img_w: - index_x_u=img_w - index_x_d=img_w-width_scale - if index_y_u>img_h: - index_y_u=img_h - index_y_d=img_h-height_scale - - - img_patch=img[index_y_d:index_y_u,index_x_d:index_x_u,:] - label_patch=label[index_y_d:index_y_u,index_x_d:index_x_u,:] - - img_patch=resize_image(img_patch,height,width) - label_patch=resize_image(label_patch,height,width) - - cv2.imwrite(dir_img_f+'/img_'+str(indexer)+'.png', img_patch ) - cv2.imwrite(dir_seg_f+'/img_'+str(indexer)+'.png' , label_patch ) - indexer+=1 + index_x_d = i * width_scale + index_x_u = (i + 1) * width_scale + + index_y_d = j * height_scale + index_y_u = (j + 1) * height_scale + + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - width_scale + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - height_scale + + img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + label_patch = label[index_y_d:index_y_u, index_x_d:index_x_u, :] + + img_patch = resize_image(img_patch, height, width) + label_patch = resize_image(label_patch, height, width) + + cv2.imwrite(dir_img_f + '/img_' + str(indexer) + '.png', img_patch) + cv2.imwrite(dir_seg_f + '/img_' + str(indexer) + '.png', label_patch) + indexer += 1 return indexer -def get_patches_num_scale_new(dir_img_f,dir_seg_f,img,label,height,width,indexer,scaler): - img=resize_image(img,int(img.shape[0]*scaler),int(img.shape[1]*scaler)) - label=resize_image(label,int(label.shape[0]*scaler),int(label.shape[1]*scaler)) - - if img.shape[0]int(nxf): - nxf=int(nxf)+1 - if nyf>int(nyf): - nyf=int(nyf)+1 - - nxf=int(nxf) - nyf=int(nyf) - + +def get_patches_num_scale_new(dir_img_f, dir_seg_f, img, label, height, width, indexer, scaler): + img = resize_image(img, int(img.shape[0] * scaler), int(img.shape[1] * scaler)) + label = resize_image(label, int(label.shape[0] * scaler), int(label.shape[1] * scaler)) + + if img.shape[0] < height or img.shape[1] < width: + img, label = do_padding(img, label, height, width) + + img_h = img.shape[0] + img_w = img.shape[1] + + height_scale = int(height * 1) + width_scale = int(width * 1) + + nxf = img_w / float(width_scale) + nyf = img_h / float(height_scale) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + if nyf > int(nyf): + nyf = int(nyf) + 1 + + nxf = int(nxf) + nyf = int(nyf) + for i in range(nxf): for j in range(nyf): - index_x_d=i*width_scale - index_x_u=(i+1)*width_scale - - index_y_d=j*height_scale - index_y_u=(j+1)*height_scale - - if index_x_u>img_w: - index_x_u=img_w - index_x_d=img_w-width_scale - if index_y_u>img_h: - index_y_u=img_h - index_y_d=img_h-height_scale - - - img_patch=img[index_y_d:index_y_u,index_x_d:index_x_u,:] - label_patch=label[index_y_d:index_y_u,index_x_d:index_x_u,:] - - #img_patch=resize_image(img_patch,height,width) - #label_patch=resize_image(label_patch,height,width) - - cv2.imwrite(dir_img_f+'/img_'+str(indexer)+'.png', img_patch ) - cv2.imwrite(dir_seg_f+'/img_'+str(indexer)+'.png' , label_patch ) - indexer+=1 + index_x_d = i * width_scale + index_x_u = (i + 1) * width_scale + + index_y_d = j * height_scale + index_y_u = (j + 1) * height_scale + + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - width_scale + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - height_scale + + img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + label_patch = label[index_y_d:index_y_u, index_x_d:index_x_u, :] + + # img_patch=resize_image(img_patch,height,width) + # label_patch=resize_image(label_patch,height,width) + + cv2.imwrite(dir_img_f + '/img_' + str(indexer) + '.png', img_patch) + cv2.imwrite(dir_seg_f + '/img_' + str(indexer) + '.png', label_patch) + indexer += 1 return indexer -def provide_patches(dir_img,dir_seg,dir_flow_train_imgs, +def provide_patches(dir_img, dir_seg, dir_flow_train_imgs, dir_flow_train_labels, - input_height,input_width,blur_k,blur_aug, - flip_aug,binarization,scaling,scales,flip_index, - scaling_bluring,scaling_binarization,rotation, - rotation_not_90,thetha,scaling_flip, - augmentation=False,patches=False): - - imgs_cv_train=np.array(os.listdir(dir_img)) - segs_cv_train=np.array(os.listdir(dir_seg)) - - indexer=0 - for im, seg_i in tqdm(zip(imgs_cv_train,segs_cv_train)): - img_name=im.split('.')[0] + input_height, input_width, blur_k, blur_aug, + flip_aug, binarization, scaling, scales, flip_index, + scaling_bluring, scaling_binarization, rotation, + rotation_not_90, thetha, scaling_flip, + augmentation=False, patches=False): + imgs_cv_train = np.array(os.listdir(dir_img)) + segs_cv_train = np.array(os.listdir(dir_seg)) + + indexer = 0 + for im, seg_i in tqdm(zip(imgs_cv_train, segs_cv_train)): + img_name = im.split('.')[0] if not patches: - cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', resize_image(cv2.imread(dir_img+'/'+im),input_height,input_width ) ) - cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png' , resize_image(cv2.imread(dir_seg+'/'+img_name+'.png'),input_height,input_width ) ) - indexer+=1 - + cv2.imwrite(dir_flow_train_imgs + '/img_' + str(indexer) + '.png', + resize_image(cv2.imread(dir_img + '/' + im), input_height, input_width)) + cv2.imwrite(dir_flow_train_labels + '/img_' + str(indexer) + '.png', + resize_image(cv2.imread(dir_seg + '/' + img_name + '.png'), input_height, input_width)) + indexer += 1 + if augmentation: if flip_aug: for f_i in flip_index: - cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', - resize_image(cv2.flip(cv2.imread(dir_img+'/'+im),f_i),input_height,input_width) ) - - cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png' , - resize_image(cv2.flip(cv2.imread(dir_seg+'/'+img_name+'.png'),f_i),input_height,input_width) ) - indexer+=1 - - if blur_aug: + cv2.imwrite(dir_flow_train_imgs + '/img_' + str(indexer) + '.png', + resize_image(cv2.flip(cv2.imread(dir_img + '/' + im), f_i), input_height, + input_width)) + + cv2.imwrite(dir_flow_train_labels + '/img_' + str(indexer) + '.png', + resize_image(cv2.flip(cv2.imread(dir_seg + '/' + img_name + '.png'), f_i), + input_height, input_width)) + indexer += 1 + + if blur_aug: for blur_i in blur_k: - cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', - (resize_image(bluring(cv2.imread(dir_img+'/'+im),blur_i),input_height,input_width) ) ) - - cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png' , - resize_image(cv2.imread(dir_seg+'/'+img_name+'.png'),input_height,input_width) ) - indexer+=1 - - + cv2.imwrite(dir_flow_train_imgs + '/img_' + str(indexer) + '.png', + (resize_image(bluring(cv2.imread(dir_img + '/' + im), blur_i), input_height, + input_width))) + + cv2.imwrite(dir_flow_train_labels + '/img_' + str(indexer) + '.png', + resize_image(cv2.imread(dir_seg + '/' + img_name + '.png'), input_height, + input_width)) + indexer += 1 + if binarization: - cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', - resize_image(otsu_copy( cv2.imread(dir_img+'/'+im)),input_height,input_width )) - - cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png', - resize_image( cv2.imread(dir_seg+'/'+img_name+'.png'),input_height,input_width )) - indexer+=1 - - - - - - + cv2.imwrite(dir_flow_train_imgs + '/img_' + str(indexer) + '.png', + resize_image(otsu_copy(cv2.imread(dir_img + '/' + im)), input_height, input_width)) + + cv2.imwrite(dir_flow_train_labels + '/img_' + str(indexer) + '.png', + resize_image(cv2.imread(dir_seg + '/' + img_name + '.png'), input_height, input_width)) + indexer += 1 + if patches: - - indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, - cv2.imread(dir_img+'/'+im),cv2.imread(dir_seg+'/'+img_name+'.png'), - input_height,input_width,indexer=indexer) - + + indexer = get_patches(dir_flow_train_imgs, dir_flow_train_labels, + cv2.imread(dir_img + '/' + im), cv2.imread(dir_seg + '/' + img_name + '.png'), + input_height, input_width, indexer=indexer) + if augmentation: - + if rotation: - - - indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, - rotation_90( cv2.imread(dir_img+'/'+im) ), - rotation_90( cv2.imread(dir_seg+'/'+img_name+'.png') ), - input_height,input_width,indexer=indexer) - + indexer = get_patches(dir_flow_train_imgs, dir_flow_train_labels, + rotation_90(cv2.imread(dir_img + '/' + im)), + rotation_90(cv2.imread(dir_seg + '/' + img_name + '.png')), + input_height, input_width, indexer=indexer) + if rotation_not_90: - + for thetha_i in thetha: - img_max_rotated,label_max_rotated=rotation_not_90_func(cv2.imread(dir_img+'/'+im),cv2.imread(dir_seg+'/'+img_name+'.png'),thetha_i) - indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, - img_max_rotated, - label_max_rotated, - input_height,input_width,indexer=indexer) + img_max_rotated, label_max_rotated = rotation_not_90_func(cv2.imread(dir_img + '/' + im), + cv2.imread( + dir_seg + '/' + img_name + '.png'), + thetha_i) + indexer = get_patches(dir_flow_train_imgs, dir_flow_train_labels, + img_max_rotated, + label_max_rotated, + input_height, input_width, indexer=indexer) if flip_aug: for f_i in flip_index: - indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, - cv2.flip( cv2.imread(dir_img+'/'+im) , f_i), - cv2.flip( cv2.imread(dir_seg+'/'+img_name+'.png') ,f_i), - input_height,input_width,indexer=indexer) - if blur_aug: + indexer = get_patches(dir_flow_train_imgs, dir_flow_train_labels, + cv2.flip(cv2.imread(dir_img + '/' + im), f_i), + cv2.flip(cv2.imread(dir_seg + '/' + img_name + '.png'), f_i), + input_height, input_width, indexer=indexer) + if blur_aug: for blur_i in blur_k: + indexer = get_patches(dir_flow_train_imgs, dir_flow_train_labels, + bluring(cv2.imread(dir_img + '/' + im), blur_i), + cv2.imread(dir_seg + '/' + img_name + '.png'), + input_height, input_width, indexer=indexer) - indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, - bluring( cv2.imread(dir_img+'/'+im) , blur_i), - cv2.imread(dir_seg+'/'+img_name+'.png'), - input_height,input_width,indexer=indexer) - - - if scaling: + if scaling: for sc_ind in scales: - indexer=get_patches_num_scale_new(dir_flow_train_imgs,dir_flow_train_labels, - cv2.imread(dir_img+'/'+im) , - cv2.imread(dir_seg+'/'+img_name+'.png'), - input_height,input_width,indexer=indexer,scaler=sc_ind) + indexer = get_patches_num_scale_new(dir_flow_train_imgs, dir_flow_train_labels, + cv2.imread(dir_img + '/' + im), + cv2.imread(dir_seg + '/' + img_name + '.png'), + input_height, input_width, indexer=indexer, scaler=sc_ind) if binarization: - indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, - otsu_copy( cv2.imread(dir_img+'/'+im)), - cv2.imread(dir_seg+'/'+img_name+'.png'), - input_height,input_width,indexer=indexer) - - - - if scaling_bluring: + indexer = get_patches(dir_flow_train_imgs, dir_flow_train_labels, + otsu_copy(cv2.imread(dir_img + '/' + im)), + cv2.imread(dir_seg + '/' + img_name + '.png'), + input_height, input_width, indexer=indexer) + + if scaling_bluring: for sc_ind in scales: for blur_i in blur_k: - indexer=get_patches_num_scale_new(dir_flow_train_imgs,dir_flow_train_labels, - bluring( cv2.imread(dir_img+'/'+im) , blur_i) , - cv2.imread(dir_seg+'/'+img_name+'.png') , - input_height,input_width,indexer=indexer,scaler=sc_ind) + indexer = get_patches_num_scale_new(dir_flow_train_imgs, dir_flow_train_labels, + bluring(cv2.imread(dir_img + '/' + im), blur_i), + cv2.imread(dir_seg + '/' + img_name + '.png'), + input_height, input_width, indexer=indexer, + scaler=sc_ind) - if scaling_binarization: + if scaling_binarization: for sc_ind in scales: - indexer=get_patches_num_scale_new(dir_flow_train_imgs,dir_flow_train_labels, - otsu_copy( cv2.imread(dir_img+'/'+im)) , - cv2.imread(dir_seg+'/'+img_name+'.png'), - input_height,input_width,indexer=indexer,scaler=sc_ind) - - if scaling_flip: + indexer = get_patches_num_scale_new(dir_flow_train_imgs, dir_flow_train_labels, + otsu_copy(cv2.imread(dir_img + '/' + im)), + cv2.imread(dir_seg + '/' + img_name + '.png'), + input_height, input_width, indexer=indexer, scaler=sc_ind) + + if scaling_flip: for sc_ind in scales: for f_i in flip_index: - indexer=get_patches_num_scale_new(dir_flow_train_imgs,dir_flow_train_labels, - cv2.flip( cv2.imread(dir_img+'/'+im) , f_i) , - cv2.flip(cv2.imread(dir_seg+'/'+img_name+'.png') ,f_i) , - input_height,input_width,indexer=indexer,scaler=sc_ind) - - - - - - - + indexer = get_patches_num_scale_new(dir_flow_train_imgs, dir_flow_train_labels, + cv2.flip(cv2.imread(dir_img + '/' + im), f_i), + cv2.flip(cv2.imread(dir_seg + '/' + img_name + '.png'), + f_i), + input_height, input_width, indexer=indexer, + scaler=sc_ind) diff --git a/eynollah/eynollah/utils/contour.py b/eynollah/eynollah/utils/contour.py index 53b39b5..95a1199 100644 --- a/eynollah/eynollah/utils/contour.py +++ b/eynollah/eynollah/utils/contour.py @@ -5,6 +5,8 @@ from shapely import geometry from .rotate import rotate_image, rotation_image_new from multiprocessing import Process, Queue, cpu_count from multiprocessing import Pool + + def contours_in_same_horizon(cy_main_hor): X1 = np.zeros((len(cy_main_hor), len(cy_main_hor))) X2 = np.zeros((len(cy_main_hor), len(cy_main_hor))) @@ -22,6 +24,7 @@ def contours_in_same_horizon(cy_main_hor): all_args.append(list(set(list_h))) return np.unique(np.array(all_args, dtype=object)) + def find_contours_mean_y_diff(contours_main): M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] @@ -42,10 +45,11 @@ def get_text_region_boxes_by_given_contours(contours): del contours return boxes, contours_new + def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): found_polygons_early = list() - for jv,c in enumerate(contours): + for jv, c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue @@ -55,17 +59,18 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint)) return found_polygons_early + def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): found_polygons_early = list() - for jv,c in enumerate(contours): + for jv, c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue polygon = geometry.Polygon([point[0] for point in c]) # area = cv2.contourArea(c) area = polygon.area - ##print(np.prod(thresh.shape[:2])) + # print(np.prod(thresh.shape[:2])) # Check that polygon has area greater than minimal area # print(hierarchy[0][jv][3],hierarchy ) if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]): # and hierarchy[0][jv][3]==-1 : @@ -73,6 +78,7 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32)) return found_polygons_early + def find_new_features_of_contours(contours_main): areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) @@ -107,25 +113,27 @@ def find_new_features_of_contours(contours_main): # dis_x=np.abs(x_max_main-x_min_main) return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin -def find_features_of_contours(contours_main): - - areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) - M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))] - cx_main=[(M_main[j]['m10']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] - cy_main=[(M_main[j]['m01']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] - x_min_main=np.array([np.min(contours_main[j][:,0,0]) for j in range(len(contours_main))]) - x_max_main=np.array([np.max(contours_main[j][:,0,0]) for j in range(len(contours_main))]) - y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))]) - y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))]) +def find_features_of_contours(contours_main): + areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) + M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] + cx_main = [(M_main[j]['m10']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] + cy_main = [(M_main[j]['m01']/(M_main[j]['m00']+1e-32)) for j in range(len(M_main))] + x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) + x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))]) + + y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) + y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))]) - return y_min_main, y_max_main + + def return_parent_contours(contours, hierarchy): contours_parent = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3] == -1] return contours_parent + def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): # pixels of images are identified by 5 @@ -145,6 +153,7 @@ def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002): return contours_imgs + def do_work_of_contours_in_image(queue_of_all_params, contours_per_process, indexes_r_con_per_pro, img, slope_first): cnts_org_per_each_subprocess = [] index_by_text_region_contours = [] @@ -165,10 +174,9 @@ def do_work_of_contours_in_image(queue_of_all_params, contours_per_process, inde cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) - cnts_org_per_each_subprocess.append(cont_int[0]) - queue_of_all_params.put([ cnts_org_per_each_subprocess, index_by_text_region_contours]) + queue_of_all_params.put([cnts_org_per_each_subprocess, index_by_text_region_contours]) def get_textregion_contours_in_org_image_multi(cnts, img, slope_first): @@ -180,10 +188,10 @@ def get_textregion_contours_in_org_image_multi(cnts, img, slope_first): nh = np.linspace(0, len(cnts), num_cores + 1) indexes_by_text_con = np.array(range(len(cnts))) for i in range(num_cores): - contours_per_process = cnts[int(nh[i]) : int(nh[i + 1])] - indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] + contours_per_process = cnts[int(nh[i]): int(nh[i + 1])] + indexes_text_con_per_process = indexes_by_text_con[int(nh[i]): int(nh[i + 1])] - processes.append(Process(target=do_work_of_contours_in_image, args=(queue_of_all_params, contours_per_process, indexes_text_con_per_process, img,slope_first ))) + processes.append(Process(target=do_work_of_contours_in_image, args=(queue_of_all_params, contours_per_process, indexes_text_con_per_process, img, slope_first))) for i in range(num_cores): processes[i].start() cnts_org = [] @@ -200,7 +208,9 @@ def get_textregion_contours_in_org_image_multi(cnts, img, slope_first): print(all_index_text_con) return cnts_org -def loop_contour_image(index_l, cnts,img, slope_first): + + +def loop_contour_image(index_l, cnts, img, slope_first): img_copy = np.zeros(img.shape) img_copy = cv2.fillPoly(img_copy, pts=[cnts[index_l]], color=(1, 1, 1)) @@ -209,7 +219,7 @@ def loop_contour_image(index_l, cnts,img, slope_first): # print(img.shape,'img') img_copy = rotation_image_new(img_copy, -slope_first) - ##print(img_copy.shape,'img_copy') + # print(img_copy.shape,'img_copy') # plt.imshow(img_copy) # plt.show() @@ -224,15 +234,17 @@ def loop_contour_image(index_l, cnts,img, slope_first): # print(np.shape(cont_int[0])) return cont_int[0] + def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first): cnts_org = [] # print(cnts,'cnts') with Pool(cpu_count()) as p: - cnts_org = p.starmap(loop_contour_image, [(index_l,cnts, img,slope_first) for index_l in range(len(cnts))]) + cnts_org = p.starmap(loop_contour_image, [(index_l, cnts, img, slope_first) for index_l in range(len(cnts))]) return cnts_org + def get_textregion_contours_in_org_image(cnts, img, slope_first): cnts_org = [] @@ -246,7 +258,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): # print(img.shape,'img') img_copy = rotation_image_new(img_copy, -slope_first) - ##print(img_copy.shape,'img_copy') + # print(img_copy.shape,'img_copy') # plt.imshow(img_copy) # plt.show() @@ -263,17 +275,18 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): return cnts_org + def get_textregion_contours_in_org_image_light(cnts, img, slope_first): h_o = img.shape[0] w_o = img.shape[1] img = cv2.resize(img, (int(img.shape[1]/3.), int(img.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) - ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) - #cnts = cnts/2 - cnts = [(i/ 3).astype(np.int32) for i in cnts] + # cnts = list( (np.array(cnts)/2).astype(np.int16) ) + # cnts = cnts/2 + cnts = [(i / 3).astype(np.int32) for i in cnts] cnts_org = [] - #print(cnts,'cnts') + # print(cnts,'cnts') for i in range(len(cnts)): img_copy = np.zeros(img.shape) img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1)) @@ -283,7 +296,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first): # print(img.shape,'img') img_copy = rotation_image_new(img_copy, -slope_first) - ##print(img_copy.shape,'img_copy') + # print(img_copy.shape,'img_copy') # plt.imshow(img_copy) # plt.show() @@ -300,6 +313,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first): return cnts_org + def return_contours_of_interested_textline(region_pre_p, pixel): # pixels of images are identified by 5 @@ -317,6 +331,7 @@ def return_contours_of_interested_textline(region_pre_p, pixel): contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hierarchy, max_area=1, min_area=0.000000003) return contours_imgs + def return_contours_of_image(image): if len(image.shape) == 2: @@ -329,6 +344,7 @@ def return_contours_of_image(image): contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) return contours, hierarchy + def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003): # pixels of images are identified by 5 @@ -348,6 +364,7 @@ def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_si return contours_imgs + def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area): # pixels of images are identified by 5 @@ -367,4 +384,3 @@ def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, img_ret = np.zeros((region_pre_p.shape[0], region_pre_p.shape[1], 3)) img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1)) return img_ret[:, :, 0] - diff --git a/eynollah/eynollah/utils/counter.py b/eynollah/eynollah/utils/counter.py index 9a3ed70..ac32dc9 100644 --- a/eynollah/eynollah/utils/counter.py +++ b/eynollah/eynollah/utils/counter.py @@ -3,6 +3,7 @@ from collections import Counter REGION_ID_TEMPLATE = 'region_%04d' LINE_ID_TEMPLATE = 'region_%04d_line_%04d' + class EynollahIdCounter(): def __init__(self, region_idx=0, line_idx=0): diff --git a/eynollah/eynollah/utils/drop_capitals.py b/eynollah/eynollah/utils/drop_capitals.py index e12028f..d464c63 100644 --- a/eynollah/eynollah/utils/drop_capitals.py +++ b/eynollah/eynollah/utils/drop_capitals.py @@ -6,6 +6,7 @@ from .contour import ( return_parent_contours, ) + def adhere_drop_capital_region_into_corresponding_textline( text_regions_p, polygons_of_drop_capitals, @@ -26,7 +27,7 @@ def adhere_drop_capital_region_into_corresponding_textline( img_con_all = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3)) for j_cont in range(len(contours_only_text_parent)): - img_con_all[all_box_coord[j_cont][0] : all_box_coord[j_cont][1], all_box_coord[j_cont][2] : all_box_coord[j_cont][3], 0] = (j_cont + 1) * 3 + img_con_all[all_box_coord[j_cont][0]: all_box_coord[j_cont][1], all_box_coord[j_cont][2]: all_box_coord[j_cont][3], 0] = (j_cont + 1) * 3 # img_con_all=cv2.fillPoly(img_con_all,pts=[contours_only_text_parent[j_cont]],color=((j_cont+1)*3,(j_cont+1)*3,(j_cont+1)*3)) # plt.imshow(img_con_all[:,:,0]) @@ -44,7 +45,7 @@ def adhere_drop_capital_region_into_corresponding_textline( # plt.imshow(img_con[:,:,0]) # plt.show() - ##img_con=cv2.dilate(img_con, kernel, iterations=30) + # img_con=cv2.dilate(img_con, kernel, iterations=30) # plt.imshow(img_con[:,:,0]) # plt.show() @@ -185,7 +186,7 @@ def adhere_drop_capital_region_into_corresponding_textline( # contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0] # print(np.shape(contours_biggest),'contours_biggest') # print(np.shape(all_found_textline_polygons[int(region_final)][arg_min])) - ##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) + # contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest except: pass @@ -230,7 +231,7 @@ def adhere_drop_capital_region_into_corresponding_textline( contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] # -all_box_coord[int(region_final)][2] contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] # -all_box_coord[int(region_final)][0] - ##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) + # contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2]) all_found_textline_polygons[int(region_final)][arg_min] = contours_biggest # all_found_textline_polygons[int(region_final)][arg_min]=contours_biggest @@ -239,49 +240,49 @@ def adhere_drop_capital_region_into_corresponding_textline( else: pass - ##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) - ###print(all_box_coord[j_cont]) - ###print(cx_t) - ###print(cy_t) - ###print(cx_d[i_drop]) - ###print(cy_d[i_drop]) - ##y_lines=all_box_coord[int(region_final)][0]+np.array(cy_t) + # cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[int(region_final)]) + # ##print(all_box_coord[j_cont]) + # ##print(cx_t) + # ##print(cy_t) + # ##print(cx_d[i_drop]) + # ##print(cy_d[i_drop]) + # #y_lines=all_box_coord[int(region_final)][0]+np.array(cy_t) - ##y_lines[y_lines 1: @@ -399,71 +400,72 @@ def adhere_drop_capital_region_into_corresponding_textline( else: pass - #####for i_drop in range(len(polygons_of_drop_capitals)): - #####for j_cont in range(len(contours_only_text_parent)): - #####img_con=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3)) - #####img_con=cv2.fillPoly(img_con,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255)) - #####img_con=cv2.fillPoly(img_con,pts=[contours_only_text_parent[j_cont]],color=(255,255,255)) - - #####img_con=img_con.astype(np.uint8) - ######imgray = cv2.cvtColor(img_con, cv2.COLOR_BGR2GRAY) - ######ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - ######contours_new,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - - #####contours_new,hir_new=return_contours_of_image(img_con) - #####contours_new_parent=return_parent_contours( contours_new,hir_new) - ######plt.imshow(img_con) - ######plt.show() - #####try: - #####if len(contours_new_parent)==1: - ######print(all_found_textline_polygons[j_cont][0]) - #####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont]) - ######print(all_box_coord[j_cont]) - ######print(cx_t) - ######print(cy_t) - ######print(cx_d[i_drop]) - ######print(cy_d[i_drop]) - #####y_lines=all_box_coord[j_cont][0]+np.array(cy_t) - - ######print(y_lines) - - #####arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) ) - ######print(arg_min) - - #####cnt_nearest=np.copy(all_found_textline_polygons[j_cont][arg_min]) - #####cnt_nearest[:,0]=all_found_textline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2] - #####cnt_nearest[:,1]=all_found_textline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0] - - #####img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3)) - #####img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255)) - #####img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255)) - - #####img_textlines=img_textlines.astype(np.uint8) - #####imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) - #####ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - #####contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - - #####areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) - - #####contours_biggest=contours_combined[np.argmax(areas_cnt_text)] - - ######print(np.shape(contours_biggest)) - ######print(contours_biggest[:]) - #####contours_biggest[:,0,0]=contours_biggest[:,0,0]-all_box_coord[j_cont][2] - #####contours_biggest[:,0,1]=contours_biggest[:,0,1]-all_box_coord[j_cont][0] - - #####all_found_textline_polygons[j_cont][arg_min]=contours_biggest - ######print(contours_biggest) - ######plt.imshow(img_textlines[:,:,0]) - ######plt.show() - #####else: - #####pass - #####except: - #####pass + # ####for i_drop in range(len(polygons_of_drop_capitals)): + # ####for j_cont in range(len(contours_only_text_parent)): + # ####img_con=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3)) + # ####img_con=cv2.fillPoly(img_con,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255)) + # ####img_con=cv2.fillPoly(img_con,pts=[contours_only_text_parent[j_cont]],color=(255,255,255)) + + # ####img_con=img_con.astype(np.uint8) + # #####imgray = cv2.cvtColor(img_con, cv2.COLOR_BGR2GRAY) + # #####ret, thresh = cv2.threshold(imgray, 0, 255, 0) + + # #####contours_new,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + + # ####contours_new,hir_new=return_contours_of_image(img_con) + # ####contours_new_parent=return_parent_contours( contours_new,hir_new) + # #####plt.imshow(img_con) + # #####plt.show() + # ####try: + # ####if len(contours_new_parent)==1: + # #####print(all_found_textline_polygons[j_cont][0]) + # ####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contours(all_found_textline_polygons[j_cont]) + # #####print(all_box_coord[j_cont]) + # #####print(cx_t) + # #####print(cy_t) + # #####print(cx_d[i_drop]) + # #####print(cy_d[i_drop]) + # ####y_lines=all_box_coord[j_cont][0]+np.array(cy_t) + + # #####print(y_lines) + + # ####arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) ) + # #####print(arg_min) + + # ####cnt_nearest=np.copy(all_found_textline_polygons[j_cont][arg_min]) + # ####cnt_nearest[:,0]=all_found_textline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2] + # ####cnt_nearest[:,1]=all_found_textline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0] + + # ####img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3)) + # ####img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255)) + # ####img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255)) + + # ####img_textlines=img_textlines.astype(np.uint8) + # ####imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY) + # ####ret, thresh = cv2.threshold(imgray, 0, 255, 0) + + # ####contours_combined,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) + + # ####areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))]) + + # ####contours_biggest=contours_combined[np.argmax(areas_cnt_text)] + + # #####print(np.shape(contours_biggest)) + # #####print(contours_biggest[:]) + # ####contours_biggest[:,0,0]=contours_biggest[:,0,0]-all_box_coord[j_cont][2] + # ####contours_biggest[:,0,1]=contours_biggest[:,0,1]-all_box_coord[j_cont][0] + + # ####all_found_textline_polygons[j_cont][arg_min]=contours_biggest + # #####print(contours_biggest) + # #####plt.imshow(img_textlines[:,:,0]) + # #####plt.show() + # ####else: + # ####pass + # ####except: + # ####pass return all_found_textline_polygons + def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1): drop_only = (layout_no_patch[:, :, 0] == 4) * 1 @@ -489,7 +491,7 @@ def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1): if iou_of_box_and_contoure > 60 and weigh_to_height_ratio < 1.2 and height_to_weight_ratio < 2: map_of_drop_contour_bb = np.zeros((layout1.shape[0], layout1.shape[1])) - map_of_drop_contour_bb[y : y + h, x : x + w] = layout1[y : y + h, x : x + w] + map_of_drop_contour_bb[y: y + h, x: x + w] = layout1[y: y + h, x: x + w] if (((map_of_drop_contour_bb == 1) * 1).sum() / float(((map_of_drop_contour_bb == 5) * 1).sum()) * 100) >= 15: contours_drop_parent_final.append(contours_drop_parent[jj]) @@ -499,4 +501,3 @@ def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1): layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=(4, 4, 4)) return layout_no_patch - diff --git a/eynollah/eynollah/utils/marginals.py b/eynollah/eynollah/utils/marginals.py index 7c43de6..d7514bb 100644 --- a/eynollah/eynollah/utils/marginals.py +++ b/eynollah/eynollah/utils/marginals.py @@ -3,250 +3,226 @@ import cv2 from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d - from .contour import find_new_features_of_contours, return_contours_of_interested_region from .resize import resize_image from .rotate import rotate_image -def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None): - mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1])) - mask_marginals=mask_marginals.astype(np.uint8) +def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None): + mask_marginals = np.zeros((text_with_lines.shape[0], text_with_lines.shape[1])) + mask_marginals = mask_marginals.astype(np.uint8) - text_with_lines=text_with_lines.astype(np.uint8) - ##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3) + text_with_lines = text_with_lines.astype(np.uint8) + # text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3) - text_with_lines_eroded=cv2.erode(text_with_lines,kernel,iterations=5) + text_with_lines_eroded = cv2.erode(text_with_lines, kernel, iterations=5) - if text_with_lines.shape[0]<=1500: + if text_with_lines.shape[0] <= 1500: pass - elif text_with_lines.shape[0]>1500 and text_with_lines.shape[0]<=1800: - text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.5),text_with_lines.shape[1]) - text_with_lines=cv2.erode(text_with_lines,kernel,iterations=5) - text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1]) + elif text_with_lines.shape[0] > 1500 and text_with_lines.shape[0] <= 1800: + text_with_lines = resize_image(text_with_lines, int(text_with_lines.shape[0] * 1.5), text_with_lines.shape[1]) + text_with_lines = cv2.erode(text_with_lines, kernel, iterations=5) + text_with_lines = resize_image(text_with_lines, text_with_lines_eroded.shape[0], + text_with_lines_eroded.shape[1]) else: - text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.8),text_with_lines.shape[1]) - text_with_lines=cv2.erode(text_with_lines,kernel,iterations=7) - text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1]) - + text_with_lines = resize_image(text_with_lines, int(text_with_lines.shape[0] * 1.8), text_with_lines.shape[1]) + text_with_lines = cv2.erode(text_with_lines, kernel, iterations=7) + text_with_lines = resize_image(text_with_lines, text_with_lines_eroded.shape[0], + text_with_lines_eroded.shape[1]) - text_with_lines_y=text_with_lines.sum(axis=0) - text_with_lines_y_eroded=text_with_lines_eroded.sum(axis=0) + text_with_lines_y = text_with_lines.sum(axis=0) + text_with_lines_y_eroded = text_with_lines_eroded.sum(axis=0) - thickness_along_y_percent=text_with_lines_y_eroded.max()/(float(text_with_lines.shape[0]))*100 + thickness_along_y_percent = text_with_lines_y_eroded.max() / (float(text_with_lines.shape[0])) * 100 - #print(thickness_along_y_percent,'thickness_along_y_percent') + # print(thickness_along_y_percent,'thickness_along_y_percent') - if thickness_along_y_percent<30: - min_textline_thickness=8 - elif thickness_along_y_percent>=30 and thickness_along_y_percent<50: - min_textline_thickness=20 + if thickness_along_y_percent < 30: + min_textline_thickness = 8 + elif thickness_along_y_percent >= 30 and thickness_along_y_percent < 50: + min_textline_thickness = 20 else: - min_textline_thickness=40 - - - - if thickness_along_y_percent>=14: - - text_with_lines_y_rev=-1*text_with_lines_y[:] - #print(text_with_lines_y) - #print(text_with_lines_y_rev) - - - - - #plt.plot(text_with_lines_y) - #plt.show() - - - text_with_lines_y_rev=text_with_lines_y_rev-np.min(text_with_lines_y_rev) - - #plt.plot(text_with_lines_y_rev) - #plt.show() - sigma_gaus=1 - region_sum_0= gaussian_filter1d(text_with_lines_y, sigma_gaus) + min_textline_thickness = 40 - region_sum_0_rev=gaussian_filter1d(text_with_lines_y_rev, sigma_gaus) + if thickness_along_y_percent >= 14: - #plt.plot(region_sum_0_rev) - #plt.show() - region_sum_0_updown=region_sum_0[len(region_sum_0)::-1] + text_with_lines_y_rev = -1 * text_with_lines_y[:] + # print(text_with_lines_y) + # print(text_with_lines_y_rev) - first_nonzero=(next((i for i, x in enumerate(region_sum_0) if x), None)) - last_nonzero=(next((i for i, x in enumerate(region_sum_0_updown) if x), None)) + # plt.plot(text_with_lines_y) + # plt.show() + text_with_lines_y_rev = text_with_lines_y_rev - np.min(text_with_lines_y_rev) - last_nonzero=len(region_sum_0)-last_nonzero + # plt.plot(text_with_lines_y_rev) + # plt.show() + sigma_gaus = 1 + region_sum_0 = gaussian_filter1d(text_with_lines_y, sigma_gaus) - ##img_sum_0_smooth_rev=-region_sum_0 + region_sum_0_rev = gaussian_filter1d(text_with_lines_y_rev, sigma_gaus) + # plt.plot(region_sum_0_rev) + # plt.show() + region_sum_0_updown = region_sum_0[len(region_sum_0)::-1] - mid_point=(last_nonzero+first_nonzero)/2. + first_nonzero = (next((i for i, x in enumerate(region_sum_0) if x), None)) + last_nonzero = (next((i for i, x in enumerate(region_sum_0_updown) if x), None)) + last_nonzero = len(region_sum_0) - last_nonzero - one_third_right=(last_nonzero-mid_point)/3.0 - one_third_left=(mid_point-first_nonzero)/3.0 - - #img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev) + # img_sum_0_smooth_rev=-region_sum_0 + mid_point = (last_nonzero + first_nonzero) / 2. + one_third_right = (last_nonzero - mid_point) / 3.0 + one_third_left = (mid_point - first_nonzero) / 3.0 + # img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev) peaks, _ = find_peaks(text_with_lines_y_rev, height=0) + peaks = np.array(peaks) - peaks=np.array(peaks) - - - #print(region_sum_0[peaks]) - ##plt.plot(region_sum_0) - ##plt.plot(peaks,region_sum_0[peaks],'*') - ##plt.show() - #print(first_nonzero,last_nonzero,peaks) - peaks=peaks[(peaks>first_nonzero) & ((peaks first_nonzero) & (peaks < last_nonzero)] + # print(first_nonzero,last_nonzero,peaks) - #print(region_sum_0[peaks]<10) - ####peaks=peaks[region_sum_0[peaks]<25 ] + # print(region_sum_0[peaks]<10) + # ###peaks=peaks[region_sum_0[peaks]<25 ] - #print(region_sum_0[peaks]) - peaks=peaks[region_sum_0[peaks]mid_point] - peaks_left=peaks[peaks(mid_point+one_third_right)] - peaks_left=peaks[peaks<(mid_point-one_third_left)] + # print(region_sum_0[peaks]) + peaks = peaks[region_sum_0[peaks] < min_textline_thickness] + # print(peaks) + # print(first_nonzero,last_nonzero,one_third_right,one_third_left) + if num_col == 1: + peaks_right = peaks[peaks > mid_point] + peaks_left = peaks[peaks < mid_point] + if num_col == 2: + peaks_right = peaks[peaks > (mid_point + one_third_right)] + peaks_left = peaks[peaks < (mid_point - one_third_left)] try: - point_right=np.min(peaks_right) + point_right = np.min(peaks_right) except: - point_right=last_nonzero - + point_right = last_nonzero try: - point_left=np.max(peaks_left) + point_left = np.max(peaks_left) except: - point_left=first_nonzero - + point_left = first_nonzero - - - #print(point_left,point_right) - #print(text_regions.shape) - if point_right>=mask_marginals.shape[1]: - point_right=mask_marginals.shape[1]-1 + # print(point_left,point_right) + # print(text_regions.shape) + if point_right >= mask_marginals.shape[1]: + point_right = mask_marginals.shape[1] - 1 try: - mask_marginals[:,point_left:point_right]=1 + mask_marginals[:, point_left:point_right] = 1 except: - mask_marginals[:,:]=1 + mask_marginals[:, :] = 1 - #print(mask_marginals.shape,point_left,point_right,'nadosh') - mask_marginals_rotated=rotate_image(mask_marginals,-slope_deskew) + # print(mask_marginals.shape,point_left,point_right,'nadosh') + mask_marginals_rotated = rotate_image(mask_marginals, -slope_deskew) - #print(mask_marginals_rotated.shape,'nadosh') - mask_marginals_rotated_sum=mask_marginals_rotated.sum(axis=0) + # print(mask_marginals_rotated.shape,'nadosh') + mask_marginals_rotated_sum = mask_marginals_rotated.sum(axis=0) - mask_marginals_rotated_sum[mask_marginals_rotated_sum!=0]=1 - index_x=np.array(range(len(mask_marginals_rotated_sum)))+1 + mask_marginals_rotated_sum[mask_marginals_rotated_sum != 0] = 1 + index_x = np.array(range(len(mask_marginals_rotated_sum))) + 1 - index_x_interest=index_x[mask_marginals_rotated_sum==1] + index_x_interest = index_x[mask_marginals_rotated_sum == 1] - min_point_of_left_marginal=np.min(index_x_interest)-16 - max_point_of_right_marginal=np.max(index_x_interest)+16 + min_point_of_left_marginal = np.min(index_x_interest) - 16 + max_point_of_right_marginal = np.max(index_x_interest) + 16 - if min_point_of_left_marginal<0: - min_point_of_left_marginal=0 - if max_point_of_right_marginal>=text_regions.shape[1]: - max_point_of_right_marginal=text_regions.shape[1]-1 + if min_point_of_left_marginal < 0: + min_point_of_left_marginal = 0 + if max_point_of_right_marginal >= text_regions.shape[1]: + max_point_of_right_marginal = text_regions.shape[1] - 1 + # print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew') + # print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated') + # plt.imshow(mask_marginals) + # plt.show() - #print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew') - #print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated') - #plt.imshow(mask_marginals) - #plt.show() + # plt.imshow(mask_marginals_rotated) + # plt.show() - #plt.imshow(mask_marginals_rotated) - #plt.show() + text_regions[(mask_marginals_rotated[:, :] != 1) & (text_regions[:, :] == 1)] = 4 - text_regions[(mask_marginals_rotated[:,:]!=1) & (text_regions[:,:]==1)]=4 + # plt.imshow(text_regions) + # plt.show() - #plt.imshow(text_regions) - #plt.show() + pixel_img = 4 + min_area_text = 0.00001 + polygons_of_marginals = return_contours_of_interested_region(text_regions, pixel_img, min_area_text) - pixel_img=4 - min_area_text=0.00001 - polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text) + cx_text_only, cy_text_only, x_min_text_only, x_max_text_only, y_min_text_only, y_max_text_only, y_cor_x_min_main = find_new_features_of_contours( + polygons_of_marginals) - cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contours(polygons_of_marginals) + text_regions[(text_regions[:, :] == 4)] = 1 - text_regions[(text_regions[:,:]==4)]=1 + marginlas_should_be_main_text = [] - marginlas_should_be_main_text=[] - - x_min_marginals_left=[] - x_min_marginals_right=[] + x_min_marginals_left = [] + x_min_marginals_right = [] for i in range(len(cx_text_only)): - x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i]) - y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i]) - #print(x_width_mar,y_height_mar,y_height_mar/x_width_mar,'y_height_mar') - if x_width_mar>16 and y_height_mar/x_width_mar<18: + x_width_mar = abs(x_min_text_only[i] - x_max_text_only[i]) + y_height_mar = abs(y_min_text_only[i] - y_max_text_only[i]) + # print(x_width_mar,y_height_mar,y_height_mar/x_width_mar,'y_height_mar') + if x_width_mar > 16 and y_height_mar / x_width_mar < 18: marginlas_should_be_main_text.append(polygons_of_marginals[i]) - if x_min_text_only[i]<(mid_point-one_third_left): - x_min_marginals_left_new=x_min_text_only[i] - if len(x_min_marginals_left)==0: + if x_min_text_only[i] < (mid_point - one_third_left): + x_min_marginals_left_new = x_min_text_only[i] + if len(x_min_marginals_left) == 0: x_min_marginals_left.append(x_min_marginals_left_new) else: - x_min_marginals_left[0]=min(x_min_marginals_left[0],x_min_marginals_left_new) + x_min_marginals_left[0] = min(x_min_marginals_left[0], x_min_marginals_left_new) else: - x_min_marginals_right_new=x_min_text_only[i] - if len(x_min_marginals_right)==0: + x_min_marginals_right_new = x_min_text_only[i] + if len(x_min_marginals_right) == 0: x_min_marginals_right.append(x_min_marginals_right_new) else: - x_min_marginals_right[0]=min(x_min_marginals_right[0],x_min_marginals_right_new) - - if len(x_min_marginals_left)==0: - x_min_marginals_left=[0] - if len(x_min_marginals_right)==0: - x_min_marginals_right=[text_regions.shape[1]-1] - - - + x_min_marginals_right[0] = min(x_min_marginals_right[0], x_min_marginals_right_new) - #print(x_min_marginals_left[0],x_min_marginals_right[0],'margo') + if len(x_min_marginals_left) == 0: + x_min_marginals_left = [0] + if len(x_min_marginals_right) == 0: + x_min_marginals_right = [text_regions.shape[1] - 1] - #print(marginlas_should_be_main_text,'marginlas_should_be_main_text') - text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) + # print(x_min_marginals_left[0],x_min_marginals_right[0],'margo') - #print(np.unique(text_regions)) + # print(marginlas_should_be_main_text,'marginlas_should_be_main_text') + text_regions = cv2.fillPoly(text_regions, pts=marginlas_should_be_main_text, color=(4, 4)) - #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 - #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 + # print(np.unique(text_regions)) - text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0 - text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0 + # text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 + # text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 - ###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4 + text_regions[:, :int(min_point_of_left_marginal)][text_regions[:, :int(min_point_of_left_marginal)] == 1] = 0 + text_regions[:, int(max_point_of_right_marginal):][text_regions[:, int(max_point_of_right_marginal):] == 1] = 0 - ###text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4 - #plt.plot(region_sum_0) - #plt.plot(peaks,region_sum_0[peaks],'*') - #plt.show() + # ##text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4 + # ##text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4 + # plt.plot(region_sum_0) + # plt.plot(peaks,region_sum_0[peaks],'*') + # plt.show() - #plt.imshow(text_regions) - #plt.show() + # plt.imshow(text_regions) + # plt.show() - #sys.exit() + # sys.exit() else: pass return text_regions diff --git a/eynollah/eynollah/utils/pil_cv2.py b/eynollah/eynollah/utils/pil_cv2.py index 83ae47d..34ef9e1 100644 --- a/eynollah/eynollah/utils/pil_cv2.py +++ b/eynollah/eynollah/utils/pil_cv2.py @@ -5,15 +5,18 @@ from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, COLOR_BGR2RGB, cvtColor, imread # from sbb_binarization + def cv2pil(img): return Image.fromarray(np.array(cvtColor(img, COLOR_BGR2RGB))) + def pil2cv(img): # from ocrd/workspace.py - color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else COLOR_RGB2BGR + color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else COLOR_RGB2BGR pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img) return cvtColor(pil_as_np_array, color_conversion) + def check_dpi(img): try: if isinstance(img, Image.Image): diff --git a/eynollah/eynollah/utils/resize.py b/eynollah/eynollah/utils/resize.py index fdc49ec..8c09b04 100644 --- a/eynollah/eynollah/utils/resize.py +++ b/eynollah/eynollah/utils/resize.py @@ -1,4 +1,5 @@ import cv2 + def resize_image(img_in, input_height, input_width): return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) diff --git a/eynollah/eynollah/utils/rotate.py b/eynollah/eynollah/utils/rotate.py index 603c2d9..4b6fbb6 100644 --- a/eynollah/eynollah/utils/rotate.py +++ b/eynollah/eynollah/utils/rotate.py @@ -3,6 +3,7 @@ import math import imutils import cv2 + def rotatedRectWithMaxArea(w, h, angle): if w <= 0 or h <= 0: return 0, 0 @@ -25,6 +26,7 @@ def rotatedRectWithMaxArea(w, h, angle): return wr, hr + def rotate_max_area_new(image, rotated, angle): wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle)) h, w, _ = rotated.shape @@ -34,17 +36,20 @@ def rotate_max_area_new(image, rotated, angle): x2 = x1 + int(wr) return rotated[y1:y2, x1:x2] + def rotation_image_new(img, thetha): rotated = imutils.rotate(img, thetha) return rotate_max_area_new(img, rotated, thetha) + def rotate_image(img_patch, slope): (h, w) = img_patch.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, slope, 1.0) return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) -def rotate_image_different( img, slope): + +def rotate_image_different(img, slope): # img = cv2.imread('images/input.jpg') num_rows, num_cols = img.shape[:2] @@ -52,6 +57,7 @@ def rotate_image_different( img, slope): img_rotation = cv2.warpAffine(img, rotation_matrix, (num_cols, num_rows)) return img_rotation + def rotate_max_area(image, rotated, rotated_textline, rotated_layout, rotated_table_prediction, angle): wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle)) h, w, _ = rotated.shape @@ -61,6 +67,7 @@ def rotate_max_area(image, rotated, rotated_textline, rotated_layout, rotated_ta x2 = x1 + int(wr) return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_table_prediction[y1:y2, x1:x2] + def rotation_not_90_func(img, textline, text_regions_p_1, table_prediction, thetha): rotated = imutils.rotate(img, thetha) rotated_textline = imutils.rotate(textline, thetha) @@ -68,6 +75,7 @@ def rotation_not_90_func(img, textline, text_regions_p_1, table_prediction, thet rotated_table_prediction = imutils.rotate(table_prediction, thetha) return rotate_max_area(img, rotated, rotated_textline, rotated_layout, rotated_table_prediction, thetha) + def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regions_p_fully, thetha): rotated = imutils.rotate(img, thetha) rotated_textline = imutils.rotate(textline, thetha) @@ -75,6 +83,7 @@ def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regio rotated_layout_full = imutils.rotate(text_regions_p_fully, thetha) return rotate_max_area_full_layout(img, rotated, rotated_textline, rotated_layout, rotated_layout_full, thetha) + def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout, rotated_layout_full, angle): wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle)) h, w, _ = rotated.shape @@ -83,4 +92,3 @@ def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout x1 = w // 2 - int(wr / 2) x2 = x1 + int(wr) return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_layout_full[y1:y2, x1:x2] - diff --git a/eynollah/eynollah/utils/xml.py b/eynollah/eynollah/utils/xml.py index 0386b25..009edd2 100644 --- a/eynollah/eynollah/utils/xml.py +++ b/eynollah/eynollah/utils/xml.py @@ -29,6 +29,7 @@ from ocrd_models.ocrd_page import ( to_xml) + def create_page_xml(imageFilename, height, width): now = datetime.now() pcgts = PcGtsType( @@ -46,6 +47,7 @@ def create_page_xml(imageFilename, height, width): )) return pcgts + def xml_reading_order(page, order_of_texts, id_of_marginalia): region_order = ReadingOrderType() og = OrderedGroupType(id="ro357564684568544579089") @@ -59,6 +61,7 @@ def xml_reading_order(page, order_of_texts, id_of_marginalia): og.add_RegionRefIndexed(RegionRefIndexedType(index=str(region_counter.get('region')), regionRef=id_marginal)) region_counter.inc('region') + def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point): indexes_sorted = np.array(indexes_sorted) index_of_types = np.array(index_of_types) diff --git a/eynollah/eynollah/writer.py b/eynollah/eynollah/writer.py index f537f65..ee81a6a 100644 --- a/eynollah/eynollah/writer.py +++ b/eynollah/eynollah/writer.py @@ -8,21 +8,22 @@ from .utils.counter import EynollahIdCounter from ocrd_utils import getLogger from ocrd_models.ocrd_page import ( - BorderType, - CoordsType, - PcGtsType, - TextLineType, - TextRegionType, - ImageRegionType, - TableRegionType, - SeparatorRegionType, - to_xml - ) + BorderType, + CoordsType, + PcGtsType, + TextLineType, + TextRegionType, + ImageRegionType, + TableRegionType, + SeparatorRegionType, + to_xml +) import numpy as np + class EynollahXmlWriter(): - def __init__(self, *, dir_out, image_filename, curved_line,textline_light, pcgts=None): + def __init__(self, *, dir_out, image_filename, curved_line, textline_light, pcgts=None): self.logger = getLogger('eynollah.writer') self.counter = EynollahIdCounter() self.dir_out = dir_out @@ -30,10 +31,10 @@ class EynollahXmlWriter(): self.curved_line = curved_line self.textline_light = textline_light self.pcgts = pcgts - self.scale_x = None # XXX set outside __init__ - self.scale_y = None # XXX set outside __init__ - self.height_org = None # XXX set outside __init__ - self.width_org = None # XXX set outside __init__ + self.scale_x = None # XXX set outside __init__ + self.scale_y = None # XXX set outside __init__ + self.height_org = None # XXX set outside __init__ + self.width_org = None # XXX set outside __init__ @property def image_filename_stem(self): @@ -50,11 +51,12 @@ class EynollahXmlWriter(): else: points_page_print += str(int((contour[0][0]) / self.scale_x)) points_page_print += ',' - points_page_print += str(int((contour[0][1] ) / self.scale_y)) + points_page_print += str(int((contour[0][1]) / self.scale_y)) points_page_print = points_page_print + ' ' return points_page_print[:-1] - def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, counter): + def serialize_lines_in_marginal(self, marginal_region, all_found_textline_polygons_marginals, marginal_idx, + page_coord, all_box_coord_marginals, slopes_marginals, counter): for j in range(len(all_found_textline_polygons_marginals[marginal_idx])): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) @@ -63,37 +65,54 @@ class EynollahXmlWriter(): for l in range(len(all_found_textline_polygons_marginals[marginal_idx][j])): if not (self.curved_line or self.textline_light): if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2: - textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) ) - textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) ) + textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + + all_box_coord_marginals[marginal_idx][2] + page_coord[ + 2]) / self.scale_x)) + textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + + all_box_coord_marginals[marginal_idx][0] + page_coord[ + 0]) / self.scale_y)) else: - textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x) ) - textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y) ) + textline_x_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + + all_box_coord_marginals[marginal_idx][2] + page_coord[ + 2]) / self.scale_x)) + textline_y_coord = max(0, int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + + all_box_coord_marginals[marginal_idx][0] + page_coord[ + 0]) / self.scale_y)) points_co += str(textline_x_coord) points_co += ',' points_co += str(textline_y_coord) if (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) <= 45: if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + page_coord[ + 2]) / self.scale_x)) points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + page_coord[ + 0]) / self.scale_y)) else: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + + page_coord[0]) / self.scale_y)) elif (self.curved_line or self.textline_light) and np.abs(slopes_marginals[marginal_idx]) > 45: if len(all_found_textline_polygons_marginals[marginal_idx][j][l]) == 2: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0] + + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][1] + + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) else: - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][0] + + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) + points_co += str(int((all_found_textline_polygons_marginals[marginal_idx][j][l][0][1] + + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y)) points_co += ' ' coords.set_points(points_co[:-1]) - def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter): + def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, + slopes, counter): self.logger.debug('enter serialize_lines_in_region') for j in range(len(all_found_textline_polygons[region_idx])): coords = CoordsType() @@ -104,11 +123,15 @@ class EynollahXmlWriter(): for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[region_idx][j]): if not (self.curved_line or self.textline_light): if len(contour_textline) == 2: - textline_x_coord = max(0, int((contour_textline[0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) - textline_y_coord = max(0, int((contour_textline[1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) + textline_x_coord = max(0, int((contour_textline[0] + region_bboxes[2] + page_coord[ + 2]) / self.scale_x)) + textline_y_coord = max(0, int((contour_textline[1] + region_bboxes[0] + page_coord[ + 0]) / self.scale_y)) else: - textline_x_coord = max(0, int((contour_textline[0][0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) - textline_y_coord = max(0, int((contour_textline[0][1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) + textline_x_coord = max(0, int((contour_textline[0][0] + region_bboxes[2] + page_coord[ + 2]) / self.scale_x)) + textline_y_coord = max(0, int((contour_textline[0][1] + region_bboxes[0] + page_coord[ + 0]) / self.scale_y)) points_co += str(textline_x_coord) points_co += ',' points_co += str(textline_y_coord) @@ -121,16 +144,18 @@ class EynollahXmlWriter(): else: points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y)) + points_co += str(int((contour_textline[0][1] + page_coord[0]) / self.scale_y)) elif (self.curved_line or self.textline_light) and np.abs(slopes[region_idx]) > 45: - if len(contour_textline)==2: - points_co += str(int((contour_textline[0] + region_bboxes[2] + page_coord[2])/self.scale_x)) + if len(contour_textline) == 2: + points_co += str(int((contour_textline[0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((contour_textline[1] + region_bboxes[0] + page_coord[0])/self.scale_y)) + points_co += str(int((contour_textline[1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) else: - points_co += str(int((contour_textline[0][0] + region_bboxes[2]+page_coord[2])/self.scale_x)) + points_co += str( + int((contour_textline[0][0] + region_bboxes[2] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((contour_textline[0][1] + region_bboxes[0]+page_coord[0])/self.scale_y)) + points_co += str( + int((contour_textline[0][1] + region_bboxes[0] + page_coord[0]) / self.scale_y)) points_co += ' ' coords.set_points(points_co[:-1]) @@ -140,7 +165,11 @@ class EynollahXmlWriter(): with open(out_fname, 'w') as f: f.write(to_xml(pcgts)) - def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables): + def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, + all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, + found_polygons_marginals, all_found_textline_polygons_marginals, + all_box_coord_marginals, slopes, slopes_marginals, cont_page, + polygons_lines_to_be_written_in_xml, found_polygons_tables): self.logger.debug('enter build_pagexml_no_full_layout') # create the file structure @@ -156,36 +185,42 @@ class EynollahXmlWriter(): for mm in range(len(found_polygons_text_region)): textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord)), - ) + Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_text_region[mm], + page_coord)), + ) page.add_TextRegion(textregion) - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter) + self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, + slopes, counter) for mm in range(len(found_polygons_marginals)): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))) + Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_marginals[mm], + page_coord))) page.add_TextRegion(marginal) - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) + self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, + all_box_coord_marginals, slopes_marginals, counter) for mm in range(len(found_polygons_text_region_img)): img_region = ImageRegionType(id=counter.next_region_id, Coords=CoordsType()) page.add_ImageRegion(img_region) points_co = '' for lmm in range(len(found_polygons_text_region_img[mm])): - points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x)) + points_co += str(int((found_polygons_text_region_img[mm][lmm, 0, 0] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y)) + points_co += str(int((found_polygons_text_region_img[mm][lmm, 0, 1] + page_coord[0]) / self.scale_y)) points_co += ' ' img_region.get_Coords().set_points(points_co[:-1]) - + for mm in range(len(polygons_lines_to_be_written_in_xml)): sep_hor = SeparatorRegionType(id=counter.next_region_id, Coords=CoordsType()) page.add_SeparatorRegion(sep_hor) points_co = '' for lmm in range(len(polygons_lines_to_be_written_in_xml[mm])): - points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm,0,0] ) / self.scale_x)) + points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm, 0, 0]) / self.scale_x)) points_co += ',' - points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm,0,1] ) / self.scale_y)) + points_co += str(int((polygons_lines_to_be_written_in_xml[mm][lmm, 0, 1]) / self.scale_y)) points_co += ' ' sep_hor.get_Coords().set_points(points_co[:-1]) for mm in range(len(found_polygons_tables)): @@ -193,15 +228,21 @@ class EynollahXmlWriter(): page.add_TableRegion(tab_region) points_co = '' for lmm in range(len(found_polygons_tables[mm])): - points_co += str(int((found_polygons_tables[mm][lmm,0,0] + page_coord[2]) / self.scale_x)) + points_co += str(int((found_polygons_tables[mm][lmm, 0, 0] + page_coord[2]) / self.scale_x)) points_co += ',' - points_co += str(int((found_polygons_tables[mm][lmm,0,1] + page_coord[0]) / self.scale_y)) + points_co += str(int((found_polygons_tables[mm][lmm, 0, 1] + page_coord[0]) / self.scale_y)) points_co += ' ' tab_region.get_Coords().set_points(points_co[:-1]) return pcgts - def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml): + def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, + order_of_texts, id_of_texts, all_found_textline_polygons, + all_found_textline_polygons_h, all_box_coord, all_box_coord_h, + found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, + found_polygons_marginals, all_found_textline_polygons_marginals, + all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, + polygons_lines_to_be_written_in_xml): self.logger.debug('enter build_pagexml_full_layout') # create the file structure @@ -216,35 +257,48 @@ class EynollahXmlWriter(): for mm in range(len(found_polygons_text_region)): textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord))) + Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_text_region[mm], + page_coord))) page.add_TextRegion(textregion) - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter) + self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, + slopes, counter) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) for mm in range(len(found_polygons_text_region_h)): textregion = TextRegionType(id=counter.next_region_id, type_='header', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) + Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], + page_coord))) page.add_TextRegion(textregion) - self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter) + self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, + slopes_h, counter) for mm in range(len(found_polygons_marginals)): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))) + Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_marginals[mm], + page_coord))) page.add_TextRegion(marginal) - self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) + self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, + all_box_coord_marginals, slopes_marginals, counter) for mm in range(len(found_polygons_drop_capitals)): page.add_TextRegion(TextRegionType(id=counter.next_region_id, type_='drop-capital', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord)))) + Coords=CoordsType(points=self.calculate_polygon_coords( + found_polygons_drop_capitals[mm], page_coord)))) for mm in range(len(found_polygons_text_region_img)): - page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) - + page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) + for mm in range(len(polygons_lines_to_be_written_in_xml)): - page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0 , 0, 0, 0])))) - + page.add_SeparatorRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType( + points=self.calculate_polygon_coords(polygons_lines_to_be_written_in_xml[mm], [0, 0, 0, 0])))) + for mm in range(len(found_polygons_tables)): - page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord)))) + page.add_TableRegion(TableRegionType(id=counter.next_region_id, Coords=CoordsType( + points=self.calculate_polygon_coords(found_polygons_tables[mm], page_coord)))) return pcgts @@ -260,6 +314,5 @@ class EynollahXmlWriter(): coords += str(int((value_bbox[0][0] + page_coord[2]) / self.scale_x)) coords += ',' coords += str(int((value_bbox[0][1] + page_coord[0]) / self.scale_y)) - coords=coords + ' ' + coords = coords + ' ' return coords[:-1] - diff --git a/tests/base.py b/tests/base.py index 9de35ef..841355d 100644 --- a/tests/base.py +++ b/tests/base.py @@ -10,12 +10,14 @@ from unittest import TestCase as VanillaTestCase, skip, main as unittests_main import pytest from ocrd_utils import disableLogging, initLogging + def main(fn=None): if fn: sys.exit(pytest.main([fn])) else: unittests_main() + class TestCase(VanillaTestCase): @classmethod @@ -26,6 +28,7 @@ class TestCase(VanillaTestCase): disableLogging() initLogging() + class CapturingTestCase(TestCase): """ A TestCase that needs to capture stderr/stdout and invoke click CLI. @@ -42,7 +45,7 @@ class CapturingTestCase(TestCase): """ self.capture_out_err() # XXX snapshot just before executing the CLI code = 0 - sys.argv[1:] = args # XXX necessary because sys.argv reflects pytest args not cli args + sys.argv[1:] = args # XXX necessary because sys.argv reflects pytest args not cli args try: cli.main(args=args) except SystemExit as e: diff --git a/tests/test_counter.py b/tests/test_counter.py index 125b7c1..145fbf6 100644 --- a/tests/test_counter.py +++ b/tests/test_counter.py @@ -1,6 +1,7 @@ from tests.base import main from eynollah.eynollah.utils.counter import EynollahIdCounter + def test_counter_string(): c = EynollahIdCounter() assert c.next_region_id == 'region_0001' @@ -11,6 +12,7 @@ def test_counter_string(): assert c.region_id(999) == 'region_0999' assert c.line_id(999, 888) == 'region_0999_line_0888' + def test_counter_init(): c = EynollahIdCounter(region_idx=2) assert c.get('region') == 2 @@ -19,6 +21,7 @@ def test_counter_init(): c.reset() assert c.get('region') == 2 + def test_counter_methods(): c = EynollahIdCounter() assert c.get('region') == 0 @@ -29,5 +32,6 @@ def test_counter_methods(): c.inc('region', -9) assert c.get('region') == 1 + if __name__ == '__main__': main(__file__) diff --git a/tests/test_dpi.py b/tests/test_dpi.py index 6317cd4..7f542e5 100644 --- a/tests/test_dpi.py +++ b/tests/test_dpi.py @@ -3,9 +3,11 @@ from pathlib import Path from eynollah.eynollah.utils.pil_cv2 import check_dpi from tests.base import main + def test_dpi(): fpath = str(Path(__file__).parent.joinpath('resources', 'kant_aufklaerung_1784_0020.tif')) assert 230 == check_dpi(cv2.imread(fpath)) + if __name__ == '__main__': main(__file__) diff --git a/tests/test_run.py b/tests/test_run.py index eeee964..f93de5e 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -8,6 +8,7 @@ testdir = Path(__file__).parent.resolve() EYNOLLAH_MODELS = environ.get('EYNOLLAH_MODELS', str(testdir.joinpath('..', 'models_eynollah').resolve())) + class TestEynollahRun(TestCase): def test_full_run(self): @@ -20,5 +21,6 @@ class TestEynollahRun(TestCase): print(code, out, err) assert not code + if __name__ == '__main__': main(__file__) diff --git a/tests/test_xml.py b/tests/test_xml.py index c8bac53..2fddd77 100644 --- a/tests/test_xml.py +++ b/tests/test_xml.py @@ -4,11 +4,13 @@ from ocrd_models.ocrd_page import to_xml PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15' + def test_create_xml(): pcgts = create_page_xml('/path/to/img.tif', 100, 100) xmlstr = to_xml(pcgts) assert 'xmlns:pc="%s"' % PAGE_2019 in xmlstr assert 'Metadata' in xmlstr + if __name__ == '__main__': main([__file__]) From 0720091a4878a22f549552f603e3048f17cdf47c Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 19 Jan 2024 16:17:02 +0000 Subject: [PATCH 08/34] adapt to ocrd>=2.54 url vs local_filename --- eynollah/eynollah/processor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eynollah/eynollah/processor.py b/eynollah/eynollah/processor.py index 042d081..8f34b1b 100644 --- a/eynollah/eynollah/processor.py +++ b/eynollah/eynollah/processor.py @@ -43,7 +43,11 @@ class EynollahProcessor(Processor): page = pcgts.get_Page() # XXX loses DPI information # page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized') - image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(url=page.imageFilename))).local_filename + if not('://' in page.imageFilename): + image_filename = next(self.workspace.mets.find_files(local_filename=page.imageFilename)).local_filename + else: + # could be a URL with file:// or truly remote + image_filename = self.workspace.download_file(next(self.workspace.mets.find_files(url=page.imageFilename))).local_filename eynollah_kwargs = { 'dir_models': self.resolve_resource(self.parameter['models']), 'allow_enhancement': False, From ccd35b426b9f1bd89d6c7223aee34157c76ab7f7 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Wed, 24 Jan 2024 19:33:49 +0100 Subject: [PATCH 09/34] adapt to OcrdFile.local_filename now :Path --- eynollah/eynollah/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eynollah/eynollah/processor.py b/eynollah/eynollah/processor.py index 8f34b1b..a03c9c3 100644 --- a/eynollah/eynollah/processor.py +++ b/eynollah/eynollah/processor.py @@ -59,7 +59,7 @@ class EynollahProcessor(Processor): 'override_dpi': self.parameter['dpi'], 'logger': LOG, 'pcgts': pcgts, - 'image_filename': image_filename + 'image_filename': str(image_filename) } Eynollah(**eynollah_kwargs).run() file_id = make_file_id(input_file, self.output_file_grp) From 604a908ec8968946078fb02e435989d5b943a50e Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 23 Feb 2024 23:33:54 +0100 Subject: [PATCH 10/34] Update processor.py --- eynollah/eynollah/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eynollah/eynollah/processor.py b/eynollah/eynollah/processor.py index a03c9c3..9207f54 100644 --- a/eynollah/eynollah/processor.py +++ b/eynollah/eynollah/processor.py @@ -43,7 +43,7 @@ class EynollahProcessor(Processor): page = pcgts.get_Page() # XXX loses DPI information # page_image, _, _ = self.workspace.image_from_page(page, page_id, feature_filter='binarized') - if not('://' in page.imageFilename): + if not ('://' in page.imageFilename): image_filename = next(self.workspace.mets.find_files(local_filename=page.imageFilename)).local_filename else: # could be a URL with file:// or truly remote From 3f659348359c10cd804cfde1974b5be1672d6ad6 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Sat, 24 Feb 2024 00:14:18 +0100 Subject: [PATCH 11/34] partial revert of https://github.com/qurator-spk/eynollah/commit/c606391c312eceab9aa3ebff071bdf12a30b45cc --- eynollah/eynollah/cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/eynollah/eynollah/cli.py b/eynollah/eynollah/cli.py index 94bf211..fe7f878 100644 --- a/eynollah/eynollah/cli.py +++ b/eynollah/eynollah/cli.py @@ -10,6 +10,7 @@ from eynollah.eynollah.eynollah import Eynollah "-i", help="image filename", type=click.Path(exists=True, dir_okay=False), + required=True, ) @click.option( "--out", @@ -198,9 +199,9 @@ def main( light_version=light_version, ignore_page_extraction=ignore_page_extraction, ) - eynollah.run() - # pcgts = eynollah.run() - # eynollah.writer.write_pagexml(pcgts) + # eynollah.run() + pcgts = eynollah.run() + eynollah.writer.write_pagexml(pcgts) if __name__ == "__main__": From 47ee128c5abd6411467a031de4cd25ac75b1adc3 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Sat, 24 Feb 2024 01:17:23 +0100 Subject: [PATCH 12/34] undo https://github.com/qurator-spk/eynollah/commit/3f659348359c10cd804cfde1974b5be1672d6ad6 --- eynollah/eynollah/cli.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/eynollah/eynollah/cli.py b/eynollah/eynollah/cli.py index fe7f878..9cc9e49 100644 --- a/eynollah/eynollah/cli.py +++ b/eynollah/eynollah/cli.py @@ -10,7 +10,7 @@ from eynollah.eynollah.eynollah import Eynollah "-i", help="image filename", type=click.Path(exists=True, dir_okay=False), - required=True, + # required=True, ) @click.option( "--out", @@ -199,9 +199,9 @@ def main( light_version=light_version, ignore_page_extraction=ignore_page_extraction, ) - # eynollah.run() - pcgts = eynollah.run() - eynollah.writer.write_pagexml(pcgts) + eynollah.run() + # pcgts = eynollah.run() + # eynollah.writer.write_pagexml(pcgts) if __name__ == "__main__": From 702656cf779f28861c805bb801a198e97485bf05 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Sat, 24 Feb 2024 01:18:24 +0100 Subject: [PATCH 13/34] more code formatting --- eynollah/eynollah/train/__init__.py | 1 + eynollah/eynollah/train/config_params.json | 6 +- eynollah/eynollah/utils/__init__.py | 2574 ++++++++++---------- 3 files changed, 1283 insertions(+), 1298 deletions(-) create mode 100644 eynollah/eynollah/train/__init__.py diff --git a/eynollah/eynollah/train/__init__.py b/eynollah/eynollah/train/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/eynollah/eynollah/train/__init__.py @@ -0,0 +1 @@ + diff --git a/eynollah/eynollah/train/config_params.json b/eynollah/eynollah/train/config_params.json index eaa50e1..b07d28d 100644 --- a/eynollah/eynollah/train/config_params.json +++ b/eynollah/eynollah/train/config_params.json @@ -24,7 +24,7 @@ "weighted_loss": false, "is_loss_soft_dice": false, "data_is_provided": false, - "dir_train": "/home/vahid/Documents/handwrittens_train/train", - "dir_eval": "/home/vahid/Documents/handwrittens_train/eval", - "dir_output": "/home/vahid/Documents/handwrittens_train/output" + "dir_train": "/path/to/training/files/train", + "dir_eval": "/path/to/training/files/eval", + "dir_output": "/path/to/training/files/output" } diff --git a/eynollah/eynollah/utils/__init__.py b/eynollah/eynollah/utils/__init__.py index d2b2488..abc2e1d 100644 --- a/eynollah/eynollah/utils/__init__.py +++ b/eynollah/eynollah/utils/__init__.py @@ -14,291 +14,282 @@ from .contour import (contours_in_same_horizon, return_contours_of_image, return_parent_contours) -def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x_max_hor_some,cy_hor_some,peak_points,cy_hor_diff): - - - x_start=[] - x_end=[] - kind=[]#if covers 2 and more than 2 columns set it to 1 otherwise 0 - len_sep=[] - y_sep=[] - y_diff=[] - new_main_sep_y=[] - - indexer=0 + +def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some, x_max_hor_some, cy_hor_some, + peak_points, cy_hor_diff): + x_start = [] + x_end = [] + kind = [] # if covers 2 and more than 2 columns set it to 1 otherwise 0 + len_sep = [] + y_sep = [] + y_diff = [] + new_main_sep_y = [] + + indexer = 0 for i in range(len(x_min_hor_some)): - starting=x_min_hor_some[i]-peak_points - starting=starting[starting>=0] - min_start=np.argmin(starting) - - - ending=peak_points-x_max_hor_some[i] - len_ending_neg=len(ending[ending<=0]) - - ending=ending[ending>0] - max_end=np.argmin(ending)+len_ending_neg - - - if (max_end-min_start)>=2: - if (max_end-min_start)==(len(peak_points)-1): + starting = x_min_hor_some[i] - peak_points + starting = starting[starting >= 0] + min_start = np.argmin(starting) + + ending = peak_points - x_max_hor_some[i] + len_ending_neg = len(ending[ending <= 0]) + + ending = ending[ending > 0] + max_end = np.argmin(ending) + len_ending_neg + + if (max_end - min_start) >= 2: + if (max_end - min_start) == (len(peak_points) - 1): new_main_sep_y.append(indexer) - - #print((max_end-min_start),len(peak_points),'(max_end-min_start)') + + # print((max_end-min_start),len(peak_points),'(max_end-min_start)') y_sep.append(cy_hor_some[i]) y_diff.append(cy_hor_diff[i]) x_end.append(max_end) - - x_start.append( min_start) - - len_sep.append(max_end-min_start) - if max_end==min_start+1: + + x_start.append(min_start) + + len_sep.append(max_end - min_start) + if max_end == min_start + 1: kind.append(0) else: kind.append(1) - - indexer+=1 - - - x_start_returned=np.copy(x_start) - x_end_returned=np.copy(x_end) - y_sep_returned=np.copy(y_sep) - y_diff_returned=np.copy(y_diff) - - - - - all_args_uniq=contours_in_same_horizon(y_sep_returned) - - args_to_be_unified=[] - y_unified=[] - y_diff_unified=[] - x_s_unified=[] - x_e_unified=[] - if len(all_args_uniq)>0: - #print('burda') + + indexer += 1 + + x_start_returned = np.copy(x_start) + x_end_returned = np.copy(x_end) + y_sep_returned = np.copy(y_sep) + y_diff_returned = np.copy(y_diff) + + all_args_uniq = contours_in_same_horizon(y_sep_returned) + + args_to_be_unified = [] + y_unified = [] + y_diff_unified = [] + x_s_unified = [] + x_e_unified = [] + if len(all_args_uniq) > 0: + # print('burda') if type(all_args_uniq[0]) is list: for dd in range(len(all_args_uniq)): - if len(all_args_uniq[dd])==2: - x_s_same_hor=np.array(x_start_returned)[all_args_uniq[dd]] - x_e_same_hor=np.array(x_end_returned)[all_args_uniq[dd]] - y_sep_same_hor=np.array(y_sep_returned)[all_args_uniq[dd]] - y_diff_same_hor=np.array(y_diff_returned)[all_args_uniq[dd]] - #print('burda2') - if x_s_same_hor[0]==(x_e_same_hor[1]-1) or x_s_same_hor[1]==(x_e_same_hor[0]-1) and x_s_same_hor[0]!=x_s_same_hor[1] and x_e_same_hor[0]!=x_e_same_hor[1]: - #print('burda3') + if len(all_args_uniq[dd]) == 2: + x_s_same_hor = np.array(x_start_returned)[all_args_uniq[dd]] + x_e_same_hor = np.array(x_end_returned)[all_args_uniq[dd]] + y_sep_same_hor = np.array(y_sep_returned)[all_args_uniq[dd]] + y_diff_same_hor = np.array(y_diff_returned)[all_args_uniq[dd]] + # print('burda2') + if x_s_same_hor[0] == (x_e_same_hor[1] - 1) or x_s_same_hor[1] == (x_e_same_hor[0] - 1) and \ + x_s_same_hor[0] != x_s_same_hor[1] and x_e_same_hor[0] != x_e_same_hor[1]: + # print('burda3') for arg_in in all_args_uniq[dd]: - #print(arg_in,'arg_in') + # print(arg_in,'arg_in') args_to_be_unified.append(arg_in) - y_selected=np.min(y_sep_same_hor) - y_diff_selected=np.max(y_diff_same_hor) - x_s_selected=np.min(x_s_same_hor) - x_e_selected=np.max(x_e_same_hor) - + y_selected = np.min(y_sep_same_hor) + y_diff_selected = np.max(y_diff_same_hor) + x_s_selected = np.min(x_s_same_hor) + x_e_selected = np.max(x_e_same_hor) + x_s_unified.append(x_s_selected) x_e_unified.append(x_e_selected) y_unified.append(y_selected) y_diff_unified.append(y_diff_selected) - - - - #print(x_s_same_hor,'x_s_same_hor') - #print(x_e_same_hor[:]-1,'x_e_same_hor') - #print('#############################') - - #print(x_s_unified,'y_selected') - #print(x_e_unified,'x_s_selected') - #print(y_unified,'x_e_same_hor') - - args_lines_not_unified=list( set(range(len(y_sep_returned)))-set(args_to_be_unified) ) - - #print(args_lines_not_unified,'args_lines_not_unified') - - x_start_returned_not_unified=list( np.array(x_start_returned)[args_lines_not_unified] ) - x_end_returned_not_unified=list( np.array(x_end_returned)[args_lines_not_unified] ) - y_sep_returned_not_unified=list (np.array(y_sep_returned)[args_lines_not_unified] ) - y_diff_returned_not_unified=list (np.array(y_diff_returned)[args_lines_not_unified] ) - + + # print(x_s_same_hor,'x_s_same_hor') + # print(x_e_same_hor[:]-1,'x_e_same_hor') + # print('#############################') + + # print(x_s_unified,'y_selected') + # print(x_e_unified,'x_s_selected') + # print(y_unified,'x_e_same_hor') + + args_lines_not_unified = list(set(range(len(y_sep_returned))) - set(args_to_be_unified)) + + # print(args_lines_not_unified,'args_lines_not_unified') + + x_start_returned_not_unified = list(np.array(x_start_returned)[args_lines_not_unified]) + x_end_returned_not_unified = list(np.array(x_end_returned)[args_lines_not_unified]) + y_sep_returned_not_unified = list(np.array(y_sep_returned)[args_lines_not_unified]) + y_diff_returned_not_unified = list(np.array(y_diff_returned)[args_lines_not_unified]) + for dv in range(len(y_unified)): y_sep_returned_not_unified.append(y_unified[dv]) y_diff_returned_not_unified.append(y_diff_unified[dv]) x_start_returned_not_unified.append(x_s_unified[dv]) x_end_returned_not_unified.append(x_e_unified[dv]) - - #print(y_sep_returned,'y_sep_returned') - #print(x_start_returned,'x_start_returned') - #print(x_end_returned,'x_end_returned') - - x_start_returned=np.copy(x_start_returned_not_unified) - x_end_returned=np.copy(x_end_returned_not_unified) - y_sep_returned=np.copy(y_sep_returned_not_unified) - y_diff_returned=np.copy(y_diff_returned_not_unified) - - - #print(y_sep_returned,'y_sep_returned2') - #print(x_start_returned,'x_start_returned2') - #print(x_end_returned,'x_end_returned2') - #print(new_main_sep_y,'new_main_sep_y') - - #print(x_start,'x_start') - #print(x_end,'x_end') - if len(new_main_sep_y)>0: - - min_ys=np.min(y_sep) - max_ys=np.max(y_sep) - - y_mains=[] + + # print(y_sep_returned,'y_sep_returned') + # print(x_start_returned,'x_start_returned') + # print(x_end_returned,'x_end_returned') + + x_start_returned = np.copy(x_start_returned_not_unified) + x_end_returned = np.copy(x_end_returned_not_unified) + y_sep_returned = np.copy(y_sep_returned_not_unified) + y_diff_returned = np.copy(y_diff_returned_not_unified) + + # print(y_sep_returned,'y_sep_returned2') + # print(x_start_returned,'x_start_returned2') + # print(x_end_returned,'x_end_returned2') + # print(new_main_sep_y,'new_main_sep_y') + + # print(x_start,'x_start') + # print(x_end,'x_end') + if len(new_main_sep_y) > 0: + + min_ys = np.min(y_sep) + max_ys = np.max(y_sep) + + y_mains = [] y_mains.append(min_ys) - y_mains_sep_ohne_grenzen=[] - + y_mains_sep_ohne_grenzen = [] + for ii in range(len(new_main_sep_y)): y_mains.append(y_sep[new_main_sep_y[ii]]) y_mains_sep_ohne_grenzen.append(y_sep[new_main_sep_y[ii]]) - + y_mains.append(max_ys) - - y_mains_sorted=np.sort(y_mains) - diff=np.diff(y_mains_sorted) - argm=np.argmax(diff) - - y_min_new=y_mains_sorted[argm] - y_max_new=y_mains_sorted[argm+1] - - #print(y_min_new,'y_min_new') - #print(y_max_new,'y_max_new') - - - #print(y_sep[new_main_sep_y[0]],y_sep,'yseps') - x_start=np.array(x_start) - x_end=np.array(x_end) - kind=np.array(kind) - y_sep=np.array(y_sep) + + y_mains_sorted = np.sort(y_mains) + diff = np.diff(y_mains_sorted) + argm = np.argmax(diff) + + y_min_new = y_mains_sorted[argm] + y_max_new = y_mains_sorted[argm + 1] + + # print(y_min_new,'y_min_new') + # print(y_max_new,'y_max_new') + + # print(y_sep[new_main_sep_y[0]],y_sep,'yseps') + x_start = np.array(x_start) + x_end = np.array(x_end) + kind = np.array(kind) + y_sep = np.array(y_sep) if (y_min_new in y_mains_sep_ohne_grenzen) and (y_max_new in y_mains_sep_ohne_grenzen): - x_start=x_start[(y_sep>y_min_new) & (y_sepy_min_new) & (y_sepy_min_new) & (y_sepy_min_new) & (y_sep y_min_new) & (y_sep < y_max_new)] + x_end = x_end[(y_sep > y_min_new) & (y_sep < y_max_new)] + kind = kind[(y_sep > y_min_new) & (y_sep < y_max_new)] + y_sep = y_sep[(y_sep > y_min_new) & (y_sep < y_max_new)] elif (y_min_new in y_mains_sep_ohne_grenzen) and (y_max_new not in y_mains_sep_ohne_grenzen): - #print('burda') - x_start=x_start[(y_sep>y_min_new) & (y_sep<=y_max_new)] - #print('burda1') - x_end=x_end[(y_sep>y_min_new) & (y_sep<=y_max_new)] - #print('burda2') - kind=kind[(y_sep>y_min_new) & (y_sep<=y_max_new)] - y_sep=y_sep[(y_sep>y_min_new) & (y_sep<=y_max_new)] + # print('burda') + x_start = x_start[(y_sep > y_min_new) & (y_sep <= y_max_new)] + # print('burda1') + x_end = x_end[(y_sep > y_min_new) & (y_sep <= y_max_new)] + # print('burda2') + kind = kind[(y_sep > y_min_new) & (y_sep <= y_max_new)] + y_sep = y_sep[(y_sep > y_min_new) & (y_sep <= y_max_new)] elif (y_min_new not in y_mains_sep_ohne_grenzen) and (y_max_new in y_mains_sep_ohne_grenzen): - x_start=x_start[(y_sep>=y_min_new) & (y_sep=y_min_new) & (y_sep=y_min_new) & (y_sep=y_min_new) & (y_sep= y_min_new) & (y_sep < y_max_new)] + x_end = x_end[(y_sep >= y_min_new) & (y_sep < y_max_new)] + kind = kind[(y_sep >= y_min_new) & (y_sep < y_max_new)] + y_sep = y_sep[(y_sep >= y_min_new) & (y_sep < y_max_new)] else: - x_start=x_start[(y_sep>=y_min_new) & (y_sep<=y_max_new)] - x_end=x_end[(y_sep>=y_min_new) & (y_sep<=y_max_new)] - kind=kind[(y_sep>=y_min_new) & (y_sep<=y_max_new)] - y_sep=y_sep[(y_sep>=y_min_new) & (y_sep<=y_max_new)] - #print(x_start,'x_start') - #print(x_end,'x_end') - #print(len_sep) - - - deleted=[] - for i in range(len(x_start)-1): - nodes_i=set(range(x_start[i],x_end[i]+1)) - for j in range(i+1,len(x_start)): - if nodes_i==set(range(x_start[j],x_end[j]+1)): - deleted.append(j) - #print(np.unique(deleted)) - - remained_sep_indexes=set(range(len(x_start)))-set(np.unique(deleted) ) - #print(remained_sep_indexes,'remained_sep_indexes') - mother=[]#if it has mother - child=[] + x_start = x_start[(y_sep >= y_min_new) & (y_sep <= y_max_new)] + x_end = x_end[(y_sep >= y_min_new) & (y_sep <= y_max_new)] + kind = kind[(y_sep >= y_min_new) & (y_sep <= y_max_new)] + y_sep = y_sep[(y_sep >= y_min_new) & (y_sep <= y_max_new)] + # print(x_start,'x_start') + # print(x_end,'x_end') + # print(len_sep) + + deleted = [] + for i in range(len(x_start) - 1): + nodes_i = set(range(x_start[i], x_end[i] + 1)) + for j in range(i + 1, len(x_start)): + if nodes_i == set(range(x_start[j], x_end[j] + 1)): + deleted.append(j) + # print(np.unique(deleted)) + + remained_sep_indexes = set(range(len(x_start))) - set(np.unique(deleted)) + # print(remained_sep_indexes,'remained_sep_indexes') + mother = [] # if it has mother + child = [] for index_i in remained_sep_indexes: - have_mother=0 - have_child=0 - nodes_ind=set(range(x_start[index_i],x_end[index_i]+1)) + have_mother = 0 + have_child = 0 + nodes_ind = set(range(x_start[index_i], x_end[index_i] + 1)) for index_j in remained_sep_indexes: - nodes_ind_j=set(range(x_start[index_j],x_end[index_j]+1)) - if nodes_indnodes_ind_j: - have_child=1 + nodes_ind_j = set(range(x_start[index_j], x_end[index_j] + 1)) + if nodes_ind < nodes_ind_j: + have_mother = 1 + if nodes_ind > nodes_ind_j: + have_child = 1 mother.append(have_mother) child.append(have_child) - - #print(mother,'mother') - #print(len(remained_sep_indexes)) - #print(len(remained_sep_indexes),len(x_start),len(x_end),len(y_sep),'lens') - y_lines_without_mother=[] - x_start_without_mother=[] - x_end_without_mother=[] - - y_lines_with_child_without_mother=[] - x_start_with_child_without_mother=[] - x_end_with_child_without_mother=[] - - #print(mother,'mother') - #print(child,'child') - - if len(remained_sep_indexes)>1: - #print(np.array(remained_sep_indexes),'np.array(remained_sep_indexes)') - #print(np.array(mother),'mother') - remained_sep_indexes_without_mother=np.array(list(remained_sep_indexes))[np.array(mother)==0] - remained_sep_indexes_with_child_without_mother=np.array(list(remained_sep_indexes))[(np.array(mother)==0) & (np.array(child)==1)] - #print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother') - - - - x_end_with_child_without_mother=np.array(x_end)[np.array(remained_sep_indexes_with_child_without_mother)] - - x_start_with_child_without_mother=np.array(x_start)[np.array(remained_sep_indexes_with_child_without_mother)] - - y_lines_with_child_without_mother=np.array(y_sep)[np.array(remained_sep_indexes_with_child_without_mother)] - - - reading_orther_type=0 - - - x_end_without_mother=np.array(x_end)[np.array(remained_sep_indexes_without_mother)] - x_start_without_mother=np.array(x_start)[np.array(remained_sep_indexes_without_mother)] - y_lines_without_mother=np.array(y_sep)[np.array(remained_sep_indexes_without_mother)] - - if len(remained_sep_indexes_without_mother)>=2: - for i in range(len(remained_sep_indexes_without_mother)-1): - ##nodes_i=set(range(x_start[remained_sep_indexes_without_mother[i]],x_end[remained_sep_indexes_without_mother[i]]+1)) - nodes_i=set(range(x_start[remained_sep_indexes_without_mother[i]],x_end[remained_sep_indexes_without_mother[i]])) - for j in range(i+1,len(remained_sep_indexes_without_mother)): - #nodes_j=set(range(x_start[remained_sep_indexes_without_mother[j]],x_end[remained_sep_indexes_without_mother[j]]+1)) - nodes_j=set(range(x_start[remained_sep_indexes_without_mother[j]],x_end[remained_sep_indexes_without_mother[j]])) - - set_diff=nodes_i-nodes_j - - if set_diff!=nodes_i: - reading_orther_type=1 + + # print(mother,'mother') + # print(len(remained_sep_indexes)) + # print(len(remained_sep_indexes),len(x_start),len(x_end),len(y_sep),'lens') + y_lines_without_mother = [] + x_start_without_mother = [] + x_end_without_mother = [] + + y_lines_with_child_without_mother = [] + x_start_with_child_without_mother = [] + x_end_with_child_without_mother = [] + + # print(mother,'mother') + # print(child,'child') + + if len(remained_sep_indexes) > 1: + # print(np.array(remained_sep_indexes),'np.array(remained_sep_indexes)') + # print(np.array(mother),'mother') + remained_sep_indexes_without_mother = np.array(list(remained_sep_indexes))[np.array(mother) == 0] + remained_sep_indexes_with_child_without_mother = np.array(list(remained_sep_indexes))[ + (np.array(mother) == 0) & (np.array(child) == 1)] + # print(remained_sep_indexes_without_mother,'remained_sep_indexes_without_mother') + + x_end_with_child_without_mother = np.array(x_end)[np.array(remained_sep_indexes_with_child_without_mother)] + + x_start_with_child_without_mother = np.array(x_start)[np.array(remained_sep_indexes_with_child_without_mother)] + + y_lines_with_child_without_mother = np.array(y_sep)[np.array(remained_sep_indexes_with_child_without_mother)] + + reading_orther_type = 0 + + x_end_without_mother = np.array(x_end)[np.array(remained_sep_indexes_without_mother)] + x_start_without_mother = np.array(x_start)[np.array(remained_sep_indexes_without_mother)] + y_lines_without_mother = np.array(y_sep)[np.array(remained_sep_indexes_without_mother)] + + if len(remained_sep_indexes_without_mother) >= 2: + for i in range(len(remained_sep_indexes_without_mother) - 1): + # nodes_i=set(range(x_start[remained_sep_indexes_without_mother[i]],x_end[remained_sep_indexes_without_mother[i]]+1)) + nodes_i = set(range(x_start[remained_sep_indexes_without_mother[i]], + x_end[remained_sep_indexes_without_mother[i]])) + for j in range(i + 1, len(remained_sep_indexes_without_mother)): + # nodes_j=set(range(x_start[remained_sep_indexes_without_mother[j]],x_end[remained_sep_indexes_without_mother[j]]+1)) + nodes_j = set(range(x_start[remained_sep_indexes_without_mother[j]], + x_end[remained_sep_indexes_without_mother[j]])) + + set_diff = nodes_i - nodes_j + + if set_diff != nodes_i: + reading_orther_type = 1 else: - reading_orther_type=0 - #print(reading_orther_type,'javab') - - #print(y_lines_with_child_without_mother,'y_lines_with_child_without_mother') - #print(x_start_with_child_without_mother,'x_start_with_child_without_mother') - #print(x_end_with_child_without_mother,'x_end_with_hild_without_mother') - - len_sep_with_child=len(np.array(child)[np.array(child)==1]) - - #print(len_sep_with_child,'len_sep_with_child') - there_is_sep_with_child=0 - - if len_sep_with_child>=1: - there_is_sep_with_child=1 - - #print(all_args_uniq,'all_args_uniq') - #print(args_to_be_unified,'args_to_be_unified') - - - return reading_orther_type,x_start_returned, x_end_returned ,y_sep_returned,y_diff_returned,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother,new_main_sep_y + reading_orther_type = 0 + # print(reading_orther_type,'javab') + + # print(y_lines_with_child_without_mother,'y_lines_with_child_without_mother') + # print(x_start_with_child_without_mother,'x_start_with_child_without_mother') + # print(x_end_with_child_without_mother,'x_end_with_hild_without_mother') + + len_sep_with_child = len(np.array(child)[np.array(child) == 1]) + + # print(len_sep_with_child,'len_sep_with_child') + there_is_sep_with_child = 0 + + if len_sep_with_child >= 1: + there_is_sep_with_child = 1 + + # print(all_args_uniq,'all_args_uniq') + # print(args_to_be_unified,'args_to_be_unified') + + return reading_orther_type, x_start_returned, x_end_returned, y_sep_returned, y_diff_returned, y_lines_without_mother, x_start_without_mother, x_end_without_mother, there_is_sep_with_child, y_lines_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, new_main_sep_y + + def crop_image_inside_box(box, img_org_copy): - image_box = img_org_copy[box[1] : box[1] + box[3], box[0] : box[0] + box[2]] + image_box = img_org_copy[box[1]: box[1] + box[3], box[0]: box[0] + box[2]] return image_box, [box[1], box[1] + box[3], box[0], box[0] + box[2]] + def otsu_copy_binary(img): img_r = np.zeros((img.shape[0], img.shape[1], 3)) img1 = img[:, :, 0] @@ -314,7 +305,6 @@ def otsu_copy_binary(img): def find_features_of_lines(contours_main): - areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))] cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] @@ -339,7 +329,9 @@ def find_features_of_lines(contours_main): slope_lines[(slope_lines != 0) & (slope_lines != 1)] = 2 dis_x = np.abs(x_max_main - x_min_main) - return slope_lines, dis_x, x_min_main, x_max_main, np.array(cy_main), np.array(slope_lines_org), y_min_main, y_max_main, np.array(cx_main) + return slope_lines, dis_x, x_min_main, x_max_main, np.array(cy_main), np.array( + slope_lines_org), y_min_main, y_max_main, np.array(cx_main) + def boosting_headers_by_longshot_region_segmentation(textregion_pre_p, textregion_pre_np, img_only_text): textregion_pre_p_org = np.copy(textregion_pre_p) @@ -350,32 +342,34 @@ def boosting_headers_by_longshot_region_segmentation(textregion_pre_p, textregio textregion_pre_p[:, :, 0][textregion_pre_p[:, :, 0] == 1] = 0 # earlier it was so, but by this manner the drop capitals are also deleted # textregion_pre_p[:,:,0][( img_only_text[:,:]==1) & (textregion_pre_p[:,:,0]!=7) & (textregion_pre_p[:,:,0]!=2)]=1 - textregion_pre_p[:, :, 0][(img_only_text[:, :] == 1) & (textregion_pre_p[:, :, 0] != 7) & (textregion_pre_p[:, :, 0] != 4) & (textregion_pre_p[:, :, 0] != 2)] = 1 + textregion_pre_p[:, :, 0][ + (img_only_text[:, :] == 1) & (textregion_pre_p[:, :, 0] != 7) & (textregion_pre_p[:, :, 0] != 4) & ( + textregion_pre_p[:, :, 0] != 2)] = 1 return textregion_pre_p def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8): - regions_without_separators_0 = regions_without_separators[:,:].sum(axis=1) + regions_without_separators_0 = regions_without_separators[:, :].sum(axis=1) z = gaussian_filter1d(regions_without_separators_0, sigma_) return np.std(z) def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8): regions_without_separators_0 = regions_without_separators[:, :].sum(axis=0) - ##plt.plot(regions_without_separators_0) - ##plt.show() + # plt.plot(regions_without_separators_0) + # plt.show() sigma_ = 35 # 70#35 - meda_n_updown = regions_without_separators_0[len(regions_without_separators_0) :: -1] + meda_n_updown = regions_without_separators_0[len(regions_without_separators_0):: -1] first_nonzero = next((i for i, x in enumerate(regions_without_separators_0) if x), 0) last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0) last_nonzero = len(regions_without_separators_0) - last_nonzero y = regions_without_separators_0 # [first_nonzero:last_nonzero] y_help = np.zeros(len(y) + 20) - y_help[10 : len(y) + 10] = y + y_help[10: len(y) + 10] = y x = np.array(range(len(y))) zneg_rev = -y_help + np.max(y_help) zneg = np.zeros(len(zneg_rev) + 20) - zneg[10 : len(zneg_rev) + 10] = zneg_rev + zneg[10: len(zneg_rev) + 10] = zneg_rev z = gaussian_filter1d(y, sigma_) zneg = gaussian_filter1d(zneg, sigma_) @@ -387,7 +381,8 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl first_nonzero = first_nonzero + 200 peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & (peaks_neg < last_nonzero)] - peaks = peaks[(peaks > 0.06 * regions_without_separators.shape[1]) & (peaks < 0.94 * regions_without_separators.shape[1])] + peaks = peaks[ + (peaks > 0.06 * regions_without_separators.shape[1]) & (peaks < 0.94 * regions_without_separators.shape[1])] peaks_neg = peaks_neg[(peaks_neg > 370) & (peaks_neg < (regions_without_separators.shape[1] - 370))] interest_pos = z[peaks] interest_pos = interest_pos[interest_pos > 10] @@ -416,16 +411,16 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl interest_neg_fin = interest_neg[(interest_neg < grenze)] peaks_neg_fin = peaks_neg[(interest_neg < grenze)] # interest_neg_fin=interest_neg[(interest_neg= 3: - index_sort_interest_neg_fin= np.argsort(interest_neg_fin) + if (num_col_classifier - ((len(interest_neg_fin)) + 1)) >= 3: + index_sort_interest_neg_fin = np.argsort(interest_neg_fin) peaks_neg_sorted = np.array(peaks_neg)[index_sort_interest_neg_fin] interest_neg_fin_sorted = np.array(interest_neg_fin)[index_sort_interest_neg_fin] - - if len(index_sort_interest_neg_fin)>=num_col_classifier: - peaks_neg_fin = list( peaks_neg_sorted[:num_col_classifier] ) - interest_neg_fin = list( interest_neg_fin_sorted[:num_col_classifier] ) + + if len(index_sort_interest_neg_fin) >= num_col_classifier: + peaks_neg_fin = list(peaks_neg_sorted[:num_col_classifier]) + interest_neg_fin = list(interest_neg_fin_sorted[:num_col_classifier]) else: peaks_neg_fin = peaks_neg[:] interest_neg_fin = interest_neg[:] @@ -441,7 +436,10 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl p_g_u = len(y) - int(len(y) / 4.0) if num_col == 3: - if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or (peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or ((peaks_neg_fin[0] + 200) < p_m and peaks_neg_fin[1] < p_m) or ((peaks_neg_fin[0] - 200) > p_m and peaks_neg_fin[1] > p_m): + if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or ( + peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or ( + (peaks_neg_fin[0] + 200) < p_m and peaks_neg_fin[1] < p_m) or ( + (peaks_neg_fin[0] - 200) > p_m and peaks_neg_fin[1] > p_m): num_col = 1 peaks_neg_fin = [] @@ -450,7 +448,7 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl num_col = 1 peaks_neg_fin = [] - ##print(len(peaks_neg_fin)) + # print(len(peaks_neg_fin)) diff_peaks = np.abs(np.diff(peaks_neg_fin)) @@ -487,9 +485,12 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl p_u_quarter = len(y) - p_quarter - ##print(num_col,'early') + # print(num_col,'early') if num_col == 3: - if (peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or (peaks_neg_true[0] < p_g_l and peaks_neg_true[1] < p_g_l) or (peaks_neg_true[0] < p_m and (peaks_neg_true[1] + 200) < p_m) or ((peaks_neg_true[0] - 200) > p_m and peaks_neg_true[1] > p_m): + if (peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or ( + peaks_neg_true[0] < p_g_l and peaks_neg_true[1] < p_g_l) or ( + peaks_neg_true[0] < p_m and (peaks_neg_true[1] + 200) < p_m) or ( + (peaks_neg_true[0] - 200) > p_m and peaks_neg_true[1] > p_m): num_col = 1 peaks_neg_true = [] elif (peaks_neg_true[0] < p_g_u and peaks_neg_true[0] > p_g_l) and (peaks_neg_true[1] > p_u_quarter): @@ -528,18 +529,19 @@ def find_num_col(regions_without_separators, num_col_classifier, tables, multipl # plt.plot([0,len(y)], [grenze,grenze]) # plt.show() - ##print(len(peaks_neg_true)) + # print(len(peaks_neg_true)) return len(peaks_neg_true), peaks_neg_true + def find_num_col_only_image(regions_without_separators, multiplier=3.8): regions_without_separators_0 = regions_without_separators[:, :].sum(axis=0) - ##plt.plot(regions_without_separators_0) - ##plt.show() + # plt.plot(regions_without_separators_0) + # plt.show() sigma_ = 15 - meda_n_updown = regions_without_separators_0[len(regions_without_separators_0) :: -1] + meda_n_updown = regions_without_separators_0[len(regions_without_separators_0):: -1] first_nonzero = next((i for i, x in enumerate(regions_without_separators_0) if x), 0) last_nonzero = next((i for i, x in enumerate(meda_n_updown) if x), 0) @@ -550,7 +552,7 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): y_help = np.zeros(len(y) + 20) - y_help[10 : len(y) + 10] = y + y_help[10: len(y) + 10] = y x = np.array(range(len(y))) @@ -558,7 +560,7 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): zneg = np.zeros(len(zneg_rev) + 20) - zneg[10 : len(zneg_rev) + 10] = zneg_rev + zneg[10: len(zneg_rev) + 10] = zneg_rev z = gaussian_filter1d(y, sigma_) zneg = gaussian_filter1d(zneg, sigma_) @@ -572,7 +574,8 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): peaks_neg = peaks_neg[(peaks_neg > first_nonzero) & (peaks_neg < last_nonzero)] - peaks = peaks[(peaks > 0.09 * regions_without_separators.shape[1]) & (peaks < 0.91 * regions_without_separators.shape[1])] + peaks = peaks[ + (peaks > 0.09 * regions_without_separators.shape[1]) & (peaks < 0.91 * regions_without_separators.shape[1])] peaks_neg = peaks_neg[(peaks_neg > 500) & (peaks_neg < (regions_without_separators.shape[1] - 500))] # print(peaks) @@ -601,7 +604,10 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): p_g_u = len(y) - int(len(y) / 3.0) if num_col == 3: - if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or (peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or (peaks_neg_fin[0] < p_m and peaks_neg_fin[1] < p_m) or (peaks_neg_fin[0] > p_m and peaks_neg_fin[1] > p_m): + if (peaks_neg_fin[0] > p_g_u and peaks_neg_fin[1] > p_g_u) or ( + peaks_neg_fin[0] < p_g_l and peaks_neg_fin[1] < p_g_l) or ( + peaks_neg_fin[0] < p_m and peaks_neg_fin[1] < p_m) or ( + peaks_neg_fin[0] > p_m and peaks_neg_fin[1] > p_m): num_col = 1 else: pass @@ -628,8 +634,7 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): # print(forest[np.argmin(z[forest]) ] ) if not isNaN(forest[np.argmin(z[forest])]): peaks_neg_true.append(forest[np.argmin(z[forest])]) - forest = [] - forest.append(peaks_neg_fin[i + 1]) + forest = [peaks_neg_fin[i + 1]] if i == (len(peaks_neg_fin) - 1): # print(print(forest[np.argmin(z[forest]) ] )) if not isNaN(forest[np.argmin(z[forest])]): @@ -646,7 +651,10 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): p_u_quarter = len(y) - p_quarter if num_col == 3: - if (peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or (peaks_neg_true[0] < p_g_l and peaks_neg_true[1] < p_g_l) or (peaks_neg_true[0] < p_m and peaks_neg_true[1] < p_m) or (peaks_neg_true[0] > p_m and peaks_neg_true[1] > p_m): + if (peaks_neg_true[0] > p_g_u and peaks_neg_true[1] > p_g_u) or ( + peaks_neg_true[0] < p_g_l and peaks_neg_true[1] < p_g_l) or ( + peaks_neg_true[0] < p_m and peaks_neg_true[1] < p_m) or ( + peaks_neg_true[0] > p_m and peaks_neg_true[1] > p_m): num_col = 1 peaks_neg_true = [] elif (peaks_neg_true[0] < p_g_u and peaks_neg_true[0] > p_g_l) and (peaks_neg_true[1] > p_u_quarter): @@ -662,7 +670,8 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): peaks_neg_true = [] if num_col == 4: - if len(np.array(peaks_neg_true)[np.array(peaks_neg_true) < p_g_l]) == 2 or len(np.array(peaks_neg_true)[np.array(peaks_neg_true) > (len(y) - p_g_l)]) == 2: + if len(np.array(peaks_neg_true)[np.array(peaks_neg_true) < p_g_l]) == 2 or len( + np.array(peaks_neg_true)[np.array(peaks_neg_true) > (len(y) - p_g_l)]) == 2: num_col = 1 peaks_neg_true = [] else: @@ -674,7 +683,8 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): for i in range(len(peaks_neg_true)): hill_main = peaks_neg_true[i] # deep_depth=z[peaks_neg] - hills_around = peaks_neg_org[((peaks_neg_org > hill_main) & (peaks_neg_org <= hill_main + 400)) | ((peaks_neg_org < hill_main) & (peaks_neg_org >= hill_main - 400))] + hills_around = peaks_neg_org[((peaks_neg_org > hill_main) & (peaks_neg_org <= hill_main + 400)) | ( + (peaks_neg_org < hill_main) & (peaks_neg_org >= hill_main - 400))] deep_depth_around = z[hills_around] # print(hill_main,z[hill_main],hills_around,deep_depth_around,'manoooo') @@ -717,11 +727,12 @@ def find_num_col_only_image(regions_without_separators, multiplier=3.8): return len(peaks_fin_true), peaks_fin_true + def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8): regions_without_separators_0 = regions_without_separators[:, :, 0].sum(axis=0) - ##plt.plot(regions_without_separators_0) - ##plt.show() + # plt.plot(regions_without_separators_0) + # plt.show() sigma_ = 35 # 70#35 @@ -732,6 +743,7 @@ def find_num_col_by_vertical_lines(regions_without_separators, multiplier=3.8): # print(peaks,'peaksnew') return peaks + def return_regions_without_separators(regions_pre): kernel = np.ones((5, 5), np.uint8) regions_without_separators = ((regions_pre[:, :] != 6) & (regions_pre[:, :] != 0)) * 1 @@ -745,7 +757,6 @@ def return_regions_without_separators(regions_pre): def put_drop_out_from_only_drop_model(layout_no_patch, layout1): - drop_only = (layout_no_patch[:, :, 0] == 4) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) @@ -753,7 +764,8 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1): areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) for j in range(len(contours_drop_parent))]) areas_cnt_text = areas_cnt_text / float(drop_only.shape[0] * drop_only.shape[1]) - contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.00001] + contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if + areas_cnt_text[jz] > 0.00001] areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.00001] @@ -764,7 +776,7 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1): # boxes.append([int(x), int(y), int(w), int(h)]) map_of_drop_contour_bb = np.zeros((layout1.shape[0], layout1.shape[1])) - map_of_drop_contour_bb[y : y + h, x : x + w] = layout1[y : y + h, x : x + w] + map_of_drop_contour_bb[y: y + h, x: x + w] = layout1[y: y + h, x: x + w] if (((map_of_drop_contour_bb == 1) * 1).sum() / float(((map_of_drop_contour_bb == 5) * 1).sum()) * 100) >= 15: contours_drop_parent_final.append(contours_drop_parent[jj]) @@ -775,8 +787,8 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1): return layout_no_patch -def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): +def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): drop_only = (layout_in_patch[:, :, 0] == 4) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) @@ -784,7 +796,8 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) for j in range(len(contours_drop_parent))]) areas_cnt_text = areas_cnt_text / float(drop_only.shape[0] * drop_only.shape[1]) - contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.00001] + contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if + areas_cnt_text[jz] > 0.00001] areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.001] @@ -792,49 +805,47 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) - layout_in_patch[y : y + h, x : x + w, 0] = 4 + layout_in_patch[y: y + h, x: x + w, 0] = 4 return layout_in_patch -def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_textline_polygons,slopes,contours_only_text_parent_d_ordered): - - cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent) - - length_con=x_max_main-x_min_main - height_con=y_max_main-y_min_main +def check_any_text_region_in_model_one_is_main_or_header(regions_model_1, regions_model_full, contours_only_text_parent, + all_box_coord, all_found_textline_polygons, slopes, + contours_only_text_parent_d_ordered): + cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = find_new_features_of_contours( + contours_only_text_parent) + length_con = x_max_main - x_min_main + height_con = y_max_main - y_min_main - all_found_textline_polygons_main=[] - all_found_textline_polygons_head=[] + all_found_textline_polygons_main = [] + all_found_textline_polygons_head = [] - all_box_coord_main=[] - all_box_coord_head=[] + all_box_coord_main = [] + all_box_coord_head = [] - slopes_main=[] - slopes_head=[] + slopes_main = [] + slopes_head = [] - contours_only_text_parent_main=[] - contours_only_text_parent_head=[] + contours_only_text_parent_main = [] + contours_only_text_parent_head = [] - contours_only_text_parent_main_d=[] - contours_only_text_parent_head_d=[] + contours_only_text_parent_main_d = [] + contours_only_text_parent_head_d = [] for ii in range(len(contours_only_text_parent)): - con=contours_only_text_parent[ii] - img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) + con = contours_only_text_parent[ii] + img = np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) + all_pixels = ((img[:, :, 0] == 255) * 1).sum() + pixels_header = (((img[:, :, 0] == 255) & (regions_model_full[:, :, 0] == 2)) * 1).sum() + pixels_main = all_pixels - pixels_header - all_pixels=((img[:,:,0]==255)*1).sum() - - pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() - pixels_main=all_pixels-pixels_header - - - if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 + if (pixels_header >= pixels_main) and ((length_con[ii] / float(height_con[ii])) >= 1.3): + regions_model_1[:, :][(regions_model_1[:, :] == 1) & (img[:, :, 0] == 255)] = 2 contours_only_text_parent_head.append(con) if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) @@ -842,7 +853,7 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions slopes_head.append(slopes[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) else: - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 + regions_model_1[:, :][(regions_model_1[:, :] == 1) & (img[:, :, 0] == 255)] = 1 contours_only_text_parent_main.append(con) if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) @@ -850,60 +861,62 @@ def check_any_text_region_in_model_one_is_main_or_header(regions_model_1,regions slopes_main.append(slopes[ii]) all_found_textline_polygons_main.append(all_found_textline_polygons[ii]) - #print(all_pixels,pixels_main,pixels_header) + # print(all_pixels,pixels_main,pixels_header) - return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_textline_polygons_main,all_found_textline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d + return regions_model_1, contours_only_text_parent_main, contours_only_text_parent_head, all_box_coord_main, all_box_coord_head, all_found_textline_polygons_main, all_found_textline_polygons_head, slopes_main, slopes_head, contours_only_text_parent_main_d, contours_only_text_parent_head_d -def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,regions_model_full,contours_only_text_parent,all_box_coord,all_found_textline_polygons,slopes,contours_only_text_parent_d_ordered): - - ### to make it faster +def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1, regions_model_full, + contours_only_text_parent, all_box_coord, + all_found_textline_polygons, slopes, + contours_only_text_parent_d_ordered): + # to make it faster h_o = regions_model_1.shape[0] w_o = regions_model_1.shape[1] - - regions_model_1 = cv2.resize(regions_model_1, (int(regions_model_1.shape[1]/3.), int(regions_model_1.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) - regions_model_full = cv2.resize(regions_model_full, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) - contours_only_text_parent = [ (i/3.).astype(np.int32) for i in contours_only_text_parent] - ### - - cx_main,cy_main ,x_min_main , x_max_main, y_min_main ,y_max_main,y_corr_x_min_from_argmin=find_new_features_of_contours(contours_only_text_parent) + regions_model_1 = cv2.resize(regions_model_1, + (int(regions_model_1.shape[1] / 3.), int(regions_model_1.shape[0] / 3.)), + interpolation=cv2.INTER_NEAREST) + regions_model_full = cv2.resize(regions_model_full, + (int(regions_model_full.shape[1] / 3.), int(regions_model_full.shape[0] / 3.)), + interpolation=cv2.INTER_NEAREST) + contours_only_text_parent = [(i / 3.).astype(np.int32) for i in contours_only_text_parent] - length_con=x_max_main-x_min_main - height_con=y_max_main-y_min_main + ### + cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin = find_new_features_of_contours( + contours_only_text_parent) + length_con = x_max_main - x_min_main + height_con = y_max_main - y_min_main - all_found_textline_polygons_main=[] - all_found_textline_polygons_head=[] + all_found_textline_polygons_main = [] + all_found_textline_polygons_head = [] - all_box_coord_main=[] - all_box_coord_head=[] + all_box_coord_main = [] + all_box_coord_head = [] - slopes_main=[] - slopes_head=[] + slopes_main = [] + slopes_head = [] - contours_only_text_parent_main=[] - contours_only_text_parent_head=[] + contours_only_text_parent_main = [] + contours_only_text_parent_head = [] - contours_only_text_parent_main_d=[] - contours_only_text_parent_head_d=[] + contours_only_text_parent_main_d = [] + contours_only_text_parent_head_d = [] for ii in range(len(contours_only_text_parent)): - con=contours_only_text_parent[ii] - img=np.zeros((regions_model_1.shape[0],regions_model_1.shape[1],3)) + con = contours_only_text_parent[ii] + img = np.zeros((regions_model_1.shape[0], regions_model_1.shape[1], 3)) img = cv2.fillPoly(img, pts=[con], color=(255, 255, 255)) + all_pixels = ((img[:, :, 0] == 255) * 1).sum() + pixels_header = (((img[:, :, 0] == 255) & (regions_model_full[:, :, 0] == 2)) * 1).sum() + pixels_main = all_pixels - pixels_header - all_pixels=((img[:,:,0]==255)*1).sum() - - pixels_header=( ( (img[:,:,0]==255) & (regions_model_full[:,:,0]==2) )*1 ).sum() - pixels_main=all_pixels-pixels_header - - - if (pixels_header>=pixels_main) and ( (length_con[ii]/float(height_con[ii]) )>=1.3 ): - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=2 + if (pixels_header >= pixels_main) and ((length_con[ii] / float(height_con[ii])) >= 1.3): + regions_model_1[:, :][(regions_model_1[:, :] == 1) & (img[:, :, 0] == 255)] = 2 contours_only_text_parent_head.append(con) if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_head_d.append(contours_only_text_parent_d_ordered[ii]) @@ -911,7 +924,7 @@ def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,r slopes_head.append(slopes[ii]) all_found_textline_polygons_head.append(all_found_textline_polygons[ii]) else: - regions_model_1[:,:][(regions_model_1[:,:]==1) & (img[:,:,0]==255) ]=1 + regions_model_1[:, :][(regions_model_1[:, :] == 1) & (img[:, :, 0] == 255)] = 1 contours_only_text_parent_main.append(con) if contours_only_text_parent_d_ordered is not None: contours_only_text_parent_main_d.append(contours_only_text_parent_d_ordered[ii]) @@ -919,19 +932,18 @@ def check_any_text_region_in_model_one_is_main_or_header_light(regions_model_1,r slopes_main.append(slopes[ii]) all_found_textline_polygons_main.append(all_found_textline_polygons[ii]) - #print(all_pixels,pixels_main,pixels_header) - + # print(all_pixels,pixels_main,pixels_header) + # to make it faster - ### to make it faster - regions_model_1 = cv2.resize(regions_model_1, (w_o, h_o), interpolation=cv2.INTER_NEAREST) - #regions_model_full = cv2.resize(img, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) - contours_only_text_parent_head = [ (i*3.).astype(np.int32) for i in contours_only_text_parent_head] - contours_only_text_parent_main = [ (i*3.).astype(np.int32) for i in contours_only_text_parent_main] + # regions_model_full = cv2.resize(img, (int(regions_model_full.shape[1]/3.), int(regions_model_full.shape[0]/3.)), interpolation=cv2.INTER_NEAREST) + contours_only_text_parent_head = [(i * 3.).astype(np.int32) for i in contours_only_text_parent_head] + contours_only_text_parent_main = [(i * 3.).astype(np.int32) for i in contours_only_text_parent_main] ### - - return regions_model_1,contours_only_text_parent_main,contours_only_text_parent_head,all_box_coord_main,all_box_coord_head,all_found_textline_polygons_main,all_found_textline_polygons_head,slopes_main,slopes_head,contours_only_text_parent_main_d,contours_only_text_parent_head_d + + return regions_model_1, contours_only_text_parent_main, contours_only_text_parent_head, all_box_coord_main, all_box_coord_head, all_found_textline_polygons_main, all_found_textline_polygons_head, slopes_main, slopes_head, contours_only_text_parent_main_d, contours_only_text_parent_head_d + def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col): # print(textlines_con) @@ -950,11 +962,11 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) textlines_tot.append(np.array(textlines_con[m1][nn], dtype=np.int32)) textlines_tot_org_form.append(textlines_con[m1][nn]) - ##img_text_all=np.zeros((textline_iamge.shape[0],textline_iamge.shape[1])) - ##img_text_all=cv2.fillPoly(img_text_all, pts =textlines_tot , color=(1,1,1)) + # img_text_all=np.zeros((textline_iamge.shape[0],textline_iamge.shape[1])) + # img_text_all=cv2.fillPoly(img_text_all, pts =textlines_tot , color=(1,1,1)) - ##plt.imshow(img_text_all) - ##plt.show() + # plt.imshow(img_text_all) + # plt.show() areas_cnt_text = np.array([cv2.contourArea(textlines_tot[j]) for j in range(len(textlines_tot))]) areas_cnt_text = areas_cnt_text / float(textline_iamge.shape[0] * textline_iamge.shape[1]) indexes_textlines = np.array(range(len(textlines_tot))) @@ -1066,10 +1078,10 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col) textlines_con_changed.append(textlines_big_org_form) return textlines_con_changed -def order_of_regions(textline_mask, contours_main, contours_header, y_ref): - ##plt.imshow(textline_mask) - ##plt.show() +def order_of_regions(textline_mask, contours_main, contours_header, y_ref): + # plt.imshow(textline_mask) + # plt.show() """ print(len(contours_main),'contours_main') mada_n=textline_mask.sum(axis=1) @@ -1107,7 +1119,7 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): y = textline_sum_along_width[:] y_padded = np.zeros(len(y) + 40) - y_padded[20 : len(y) + 20] = y + y_padded[20: len(y) + 20] = y x = np.array(range(len(y))) peaks_real, _ = find_peaks(gaussian_filter1d(y, 3), height=0) @@ -1118,7 +1130,7 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): zneg_rev = -y_padded + np.max(y_padded) zneg = np.zeros(len(zneg_rev) + 40) - zneg[20 : len(zneg_rev) + 20] = zneg_rev + zneg[20: len(zneg_rev) + 20] = zneg_rev zneg = gaussian_filter1d(zneg, sigma_gaus) peaks, _ = find_peaks(z, height=0) @@ -1127,8 +1139,8 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): peaks_neg = peaks_neg - 20 - 20 peaks = peaks - 20 - ##plt.plot(z) - ##plt.show() + # plt.plot(z) + # plt.show() if contours_main != None: areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) @@ -1178,16 +1190,16 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): matrix_of_orders[:, 0] = np.array(range(len(contours_main) + len(contours_header))) matrix_of_orders[: len(contours_main), 1] = 1 - matrix_of_orders[len(contours_main) :, 1] = 2 + matrix_of_orders[len(contours_main):, 1] = 2 matrix_of_orders[: len(contours_main), 2] = cx_main - matrix_of_orders[len(contours_main) :, 2] = cx_header + matrix_of_orders[len(contours_main):, 2] = cx_header matrix_of_orders[: len(contours_main), 3] = cy_main - matrix_of_orders[len(contours_main) :, 3] = cy_header + matrix_of_orders[len(contours_main):, 3] = cy_header matrix_of_orders[: len(contours_main), 4] = np.array(range(len(contours_main))) - matrix_of_orders[len(contours_main) :, 4] = np.array(range(len(contours_header))) + matrix_of_orders[len(contours_main):, 4] = np.array(range(len(contours_header))) # print(peaks_neg_new,'peaks_neg_new') @@ -1202,11 +1214,12 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): # print(top,down,'topdown') - indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] + indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & (matrix_of_orders[:, 3] < down)] + cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) & (matrix_of_orders[:, 3] < down)] + cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) & (matrix_of_orders[:, 3] < down)] + types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) & (matrix_of_orders[:, 3] < down)] + index_types_of_text = matrix_of_orders[:, 4][ + (matrix_of_orders[:, 3] >= top) & (matrix_of_orders[:, 3] < down)] # print(top,down) # print(cys_in,'cyyyins') @@ -1222,7 +1235,7 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): final_types.append(int(ind_in_type[j])) final_index_type.append(int(ind_ind_type[j])) - ##matrix_of_orders[:len_main,4]=final_indexers_sorted[:] + # matrix_of_orders[:len_main,4]=final_indexers_sorted[:] # print(peaks_neg_new,'peaks') # print(final_indexers_sorted,'indexsorted') @@ -1231,103 +1244,109 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): return final_indexers_sorted, matrix_of_orders, final_types, final_index_type -def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(img_p_in_ver, img_in_hor,num_col_classifier): - #img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2) - img_p_in_ver=img_p_in_ver.astype(np.uint8) - img_p_in_ver=np.repeat(img_p_in_ver[:, :, np.newaxis], 3, axis=2) + +def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(img_p_in_ver, img_in_hor, + num_col_classifier): + # img_p_in_ver = cv2.erode(img_p_in_ver, self.kernel, iterations=2) + img_p_in_ver = img_p_in_ver.astype(np.uint8) + img_p_in_ver = np.repeat(img_p_in_ver[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(img_p_in_ver, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_lines_ver,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - - slope_lines_ver,dist_x_ver, x_min_main_ver ,x_max_main_ver ,cy_main_ver,slope_lines_org_ver,y_min_main_ver, y_max_main_ver, cx_main_ver=find_features_of_lines(contours_lines_ver) - + contours_lines_ver, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + slope_lines_ver, dist_x_ver, x_min_main_ver, x_max_main_ver, cy_main_ver, slope_lines_org_ver, y_min_main_ver, y_max_main_ver, cx_main_ver = find_features_of_lines( + contours_lines_ver) + for i in range(len(x_min_main_ver)): - img_p_in_ver[int(y_min_main_ver[i]):int(y_min_main_ver[i])+30,int(cx_main_ver[i])-25:int(cx_main_ver[i])+25,0]=0 - img_p_in_ver[int(y_max_main_ver[i])-30:int(y_max_main_ver[i]),int(cx_main_ver[i])-25:int(cx_main_ver[i])+25,0]=0 - - - img_in_hor=img_in_hor.astype(np.uint8) - img_in_hor=np.repeat(img_in_hor[:, :, np.newaxis], 3, axis=2) + img_p_in_ver[int(y_min_main_ver[i]):int(y_min_main_ver[i]) + 30, + int(cx_main_ver[i]) - 25:int(cx_main_ver[i]) + 25, 0] = 0 + img_p_in_ver[int(y_max_main_ver[i]) - 30:int(y_max_main_ver[i]), + int(cx_main_ver[i]) - 25:int(cx_main_ver[i]) + 25, 0] = 0 + + img_in_hor = img_in_hor.astype(np.uint8) + img_in_hor = np.repeat(img_in_hor[:, :, np.newaxis], 3, axis=2) imgray = cv2.cvtColor(img_in_hor, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_lines_hor,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - - slope_lines_hor,dist_x_hor, x_min_main_hor ,x_max_main_hor ,cy_main_hor,slope_lines_org_hor,y_min_main_hor, y_max_main_hor, cx_main_hor=find_features_of_lines(contours_lines_hor) - - - x_width_smaller_than_acolumn_width=img_in_hor.shape[1]/float(num_col_classifier+1.) - - len_lines_bigger_than_x_width_smaller_than_acolumn_width=len( dist_x_hor[dist_x_hor>=x_width_smaller_than_acolumn_width] ) - - len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column=int( len_lines_bigger_than_x_width_smaller_than_acolumn_width/float(num_col_classifier) ) - - - if len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column<10: - args_hor=np.array( range(len(slope_lines_hor) )) - all_args_uniq=contours_in_same_horizon(cy_main_hor) - #print(all_args_uniq,'all_args_uniq') - if len(all_args_uniq)>0: + contours_lines_hor, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + slope_lines_hor, dist_x_hor, x_min_main_hor, x_max_main_hor, cy_main_hor, slope_lines_org_hor, y_min_main_hor, y_max_main_hor, cx_main_hor = find_features_of_lines( + contours_lines_hor) + + x_width_smaller_than_acolumn_width = img_in_hor.shape[1] / float(num_col_classifier + 1.) + + len_lines_bigger_than_x_width_smaller_than_acolumn_width = len( + dist_x_hor[dist_x_hor >= x_width_smaller_than_acolumn_width]) + + len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column = int( + len_lines_bigger_than_x_width_smaller_than_acolumn_width / float(num_col_classifier)) + + if len_lines_bigger_than_x_width_smaller_than_acolumn_width_per_column < 10: + args_hor = np.array(range(len(slope_lines_hor))) + all_args_uniq = contours_in_same_horizon(cy_main_hor) + # print(all_args_uniq,'all_args_uniq') + if len(all_args_uniq) > 0: if type(all_args_uniq[0]) is list: - special_separators=[] - contours_new=[] + special_separators = [] + contours_new = [] for dd in range(len(all_args_uniq)): - merged_all=None - some_args=args_hor[all_args_uniq[dd]] - some_cy=cy_main_hor[all_args_uniq[dd]] - some_x_min=x_min_main_hor[all_args_uniq[dd]] - some_x_max=x_max_main_hor[all_args_uniq[dd]] - - #img_in=np.zeros(separators_closeup_n[:,:,2].shape) - #print(img_p_in_ver.shape[1],some_x_max-some_x_min,'xdiff') - diff_x_some=some_x_max-some_x_min + merged_all = None + some_args = args_hor[all_args_uniq[dd]] + some_cy = cy_main_hor[all_args_uniq[dd]] + some_x_min = x_min_main_hor[all_args_uniq[dd]] + some_x_max = x_max_main_hor[all_args_uniq[dd]] + + # img_in=np.zeros(separators_closeup_n[:,:,2].shape) + # print(img_p_in_ver.shape[1],some_x_max-some_x_min,'xdiff') + diff_x_some = some_x_max - some_x_min for jv in range(len(some_args)): - img_p_in=cv2.fillPoly(img_in_hor, pts =[contours_lines_hor[some_args[jv]]], color=(1,1,1)) - - if any(i_diff>(img_p_in_ver.shape[1]/float(3.3)) for i_diff in diff_x_some): - img_p_in[int(np.mean(some_cy))-5:int(np.mean(some_cy))+5, int(np.min(some_x_min)):int(np.max(some_x_max)) ]=1 - - sum_dis=dist_x_hor[some_args].sum() - diff_max_min_uniques=np.max(x_max_main_hor[some_args])-np.min(x_min_main_hor[some_args]) - - - if diff_max_min_uniques>sum_dis and ( (sum_dis/float(diff_max_min_uniques) ) >0.85 ) and ( (diff_max_min_uniques/float(img_p_in_ver.shape[1]))>0.85 ) and np.std( dist_x_hor[some_args] )<(0.55*np.mean( dist_x_hor[some_args] )): - #print(dist_x_hor[some_args],dist_x_hor[some_args].sum(),np.min(x_min_main_hor[some_args]) ,np.max(x_max_main_hor[some_args]),'jalibdi') - #print(np.mean( dist_x_hor[some_args] ),np.std( dist_x_hor[some_args] ),np.var( dist_x_hor[some_args] ),'jalibdiha') + img_p_in = cv2.fillPoly(img_in_hor, pts=[contours_lines_hor[some_args[jv]]], color=(1, 1, 1)) + + if any(i_diff > (img_p_in_ver.shape[1] / float(3.3)) for i_diff in diff_x_some): + img_p_in[int(np.mean(some_cy)) - 5:int(np.mean(some_cy)) + 5, + int(np.min(some_x_min)):int(np.max(some_x_max))] = 1 + + sum_dis = dist_x_hor[some_args].sum() + diff_max_min_uniques = np.max(x_max_main_hor[some_args]) - np.min(x_min_main_hor[some_args]) + + if diff_max_min_uniques > sum_dis and ((sum_dis / float(diff_max_min_uniques)) > 0.85) and ( + (diff_max_min_uniques / float(img_p_in_ver.shape[1])) > 0.85) and np.std( + dist_x_hor[some_args]) < (0.55 * np.mean(dist_x_hor[some_args])): + # print(dist_x_hor[some_args],dist_x_hor[some_args].sum(),np.min(x_min_main_hor[some_args]) ,np.max(x_max_main_hor[some_args]),'jalibdi') + # print(np.mean( dist_x_hor[some_args] ),np.std( dist_x_hor[some_args] ),np.var( dist_x_hor[some_args] ),'jalibdiha') special_separators.append(np.mean(cy_main_hor[some_args])) else: - img_p_in=img_in_hor - special_separators=[] + img_p_in = img_in_hor + special_separators = [] else: - img_p_in=img_in_hor - special_separators=[] + img_p_in = img_in_hor + special_separators = [] - - img_p_in_ver[:,:,0][img_p_in_ver[:,:,0]==255]=1 - sep_ver_hor=img_p_in+img_p_in_ver + img_p_in_ver[:, :, 0][img_p_in_ver[:, :, 0] == 255] = 1 + sep_ver_hor = img_p_in + img_p_in_ver + sep_ver_hor_cross = (sep_ver_hor[:, :, 0] == 2) * 1 - sep_ver_hor_cross=(sep_ver_hor[:,:,0]==2)*1 - - sep_ver_hor_cross=np.repeat(sep_ver_hor_cross[:, :, np.newaxis], 3, axis=2) - sep_ver_hor_cross=sep_ver_hor_cross.astype(np.uint8) + sep_ver_hor_cross = np.repeat(sep_ver_hor_cross[:, :, np.newaxis], 3, axis=2) + sep_ver_hor_cross = sep_ver_hor_cross.astype(np.uint8) imgray = cv2.cvtColor(sep_ver_hor_cross, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - contours_cross,_=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - - cx_cross,cy_cross ,_ , _, _ ,_,_=find_new_features_of_contours(contours_cross) - + contours_cross, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + cx_cross, cy_cross, _, _, _, _, _ = find_new_features_of_contours(contours_cross) + for ii in range(len(cx_cross)): - img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])+5:int(cx_cross[ii])+40,0]=0 - img_p_in[int(cy_cross[ii])-30:int(cy_cross[ii])+30,int(cx_cross[ii])-40:int(cx_cross[ii])-4,0]=0 - + img_p_in[int(cy_cross[ii]) - 30:int(cy_cross[ii]) + 30, int(cx_cross[ii]) + 5:int(cx_cross[ii]) + 40, 0] = 0 + img_p_in[int(cy_cross[ii]) - 30:int(cy_cross[ii]) + 30, int(cx_cross[ii]) - 40:int(cx_cross[ii]) - 4, 0] = 0 + else: - img_p_in=np.copy(img_in_hor) - special_separators=[] - return img_p_in[:,:,0],special_separators + img_p_in = np.copy(img_in_hor) + special_separators = [] + return img_p_in[:, :, 0], special_separators + def return_points_with_boundies(peaks_neg_fin, first_point, last_point): peaks_neg_tot = [] @@ -1337,102 +1356,95 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point): peaks_neg_tot.append(last_point) return peaks_neg_tot + def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None): + separators_closeup = (region_pre_p[:, :, :] == pixel_lines) * 1 - separators_closeup=( (region_pre_p[:,:,:]==pixel_lines))*1 - - separators_closeup[0:110,:,:]=0 - separators_closeup[separators_closeup.shape[0]-150:,:,:]=0 - - kernel = np.ones((5,5),np.uint8) - - separators_closeup=separators_closeup.astype(np.uint8) - separators_closeup = cv2.dilate(separators_closeup,kernel,iterations = 1) - separators_closeup = cv2.erode(separators_closeup,kernel,iterations = 1) - - - separators_closeup_new=np.zeros((separators_closeup.shape[0] ,separators_closeup.shape[1] )) - - - - ##_,separators_closeup_n=self.combine_hor_lines_and_delete_cross_points_and_get_lines_features_back(region_pre_p[:,:,0]) - separators_closeup_n=np.copy(separators_closeup) - - separators_closeup_n=separators_closeup_n.astype(np.uint8) - ##plt.imshow(separators_closeup_n[:,:,0]) - ##plt.show() - - separators_closeup_n_binary=np.zeros(( separators_closeup_n.shape[0],separators_closeup_n.shape[1]) ) - separators_closeup_n_binary[:,:]=separators_closeup_n[:,:,0] - - separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]!=0]=1 - #separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]==0]=255 - #separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]==-255]=0 - - - #separators_closeup_n_binary=(separators_closeup_n_binary[:,:]==2)*1 - - #gray = cv2.cvtColor(separators_closeup_n, cv2.COLOR_BGR2GRAY) - - ### - - #print(separators_closeup_n_binary.shape) - gray_early=np.repeat(separators_closeup_n_binary[:, :, np.newaxis], 3, axis=2) - gray_early=gray_early.astype(np.uint8) - - #print(gray_early.shape,'burda') + separators_closeup[0:110, :, :] = 0 + separators_closeup[separators_closeup.shape[0] - 150:, :, :] = 0 + + kernel = np.ones((5, 5), np.uint8) + + separators_closeup = separators_closeup.astype(np.uint8) + separators_closeup = cv2.dilate(separators_closeup, kernel, iterations=1) + separators_closeup = cv2.erode(separators_closeup, kernel, iterations=1) + + separators_closeup_new = np.zeros((separators_closeup.shape[0], separators_closeup.shape[1])) + + # _,separators_closeup_n=self.combine_hor_lines_and_delete_cross_points_and_get_lines_features_back(region_pre_p[:,:,0]) + separators_closeup_n = np.copy(separators_closeup) + + separators_closeup_n = separators_closeup_n.astype(np.uint8) + # plt.imshow(separators_closeup_n[:,:,0]) + # plt.show() + + separators_closeup_n_binary = np.zeros((separators_closeup_n.shape[0], separators_closeup_n.shape[1])) + separators_closeup_n_binary[:, :] = separators_closeup_n[:, :, 0] + + separators_closeup_n_binary[:, :][separators_closeup_n_binary[:, :] != 0] = 1 + # separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]==0]=255 + # separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]==-255]=0 + + # separators_closeup_n_binary=(separators_closeup_n_binary[:,:]==2)*1 + + # gray = cv2.cvtColor(separators_closeup_n, cv2.COLOR_BGR2GRAY) + + # ## + + # print(separators_closeup_n_binary.shape) + gray_early = np.repeat(separators_closeup_n_binary[:, :, np.newaxis], 3, axis=2) + gray_early = gray_early.astype(np.uint8) + + # print(gray_early.shape,'burda') imgray_e = cv2.cvtColor(gray_early, cv2.COLOR_BGR2GRAY) - #print('burda2') + # print('burda2') ret_e, thresh_e = cv2.threshold(imgray_e, 0, 255, 0) - - #print('burda3') - contours_line_e,hierarchy_e=cv2.findContours(thresh_e,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - - #slope_lines_e,dist_x_e, x_min_main_e ,x_max_main_e ,cy_main_e,slope_lines_org_e,y_min_main_e, y_max_main_e, cx_main_e=self.find_features_of_lines(contours_line_e) - - slope_linese,dist_xe, x_min_maine ,x_max_maine ,cy_maine,slope_lines_orge,y_min_maine, y_max_maine, cx_maine=find_features_of_lines(contours_line_e) - - dist_ye=y_max_maine-y_min_maine - #print(y_max_maine-y_min_maine,'y') - #print(dist_xe,'x') - - - args_e=np.array(range(len(contours_line_e))) - args_hor_e=args_e[(dist_ye<=50) & (dist_xe>=3*dist_ye)] - - #print(args_hor_e,'jidi',len(args_hor_e),'jilva') - - cnts_hor_e=[] + + # print('burda3') + contours_line_e, hierarchy_e = cv2.findContours(thresh_e, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + # slope_lines_e,dist_x_e, x_min_main_e ,x_max_main_e ,cy_main_e,slope_lines_org_e,y_min_main_e, y_max_main_e, cx_main_e=self.find_features_of_lines(contours_line_e) + + slope_linese, dist_xe, x_min_maine, x_max_maine, cy_maine, slope_lines_orge, y_min_maine, y_max_maine, cx_maine = find_features_of_lines( + contours_line_e) + + dist_ye = y_max_maine - y_min_maine + # print(y_max_maine-y_min_maine,'y') + # print(dist_xe,'x') + + args_e = np.array(range(len(contours_line_e))) + args_hor_e = args_e[(dist_ye <= 50) & (dist_xe >= 3 * dist_ye)] + + # print(args_hor_e,'jidi',len(args_hor_e),'jilva') + + cnts_hor_e = [] for ce in args_hor_e: cnts_hor_e.append(contours_line_e[ce]) - #print(len(slope_linese),'lieee') - - figs_e=np.zeros(thresh_e.shape) - figs_e=cv2.fillPoly(figs_e,pts=cnts_hor_e,color=(1,1,1)) - - #plt.imshow(figs_e) - #plt.show() - + # print(len(slope_linese),'lieee') + + figs_e = np.zeros(thresh_e.shape) + figs_e = cv2.fillPoly(figs_e, pts=cnts_hor_e, color=(1, 1, 1)) + + # plt.imshow(figs_e) + # plt.show() + ### - - separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary,pts=cnts_hor_e,color=(0,0,0)) - + + separators_closeup_n_binary = cv2.fillPoly(separators_closeup_n_binary, pts=cnts_hor_e, color=(0, 0, 0)) + gray = cv2.bitwise_not(separators_closeup_n_binary) - gray=gray.astype(np.uint8) - - - #plt.imshow(gray) - #plt.show() - - - bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, \ - cv2.THRESH_BINARY, 15, -2) - ##plt.imshow(bw[:,:]) - ##plt.show() - + gray = gray.astype(np.uint8) + + # plt.imshow(gray) + # plt.show() + + bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, -2) + # plt.imshow(bw[:,:]) + # plt.show() + horizontal = np.copy(bw) vertical = np.copy(bw) - + cols = horizontal.shape[1] horizontal_size = cols // 30 # Create structure element for extracting horizontal lines through morphology operations @@ -1441,23 +1453,19 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, horizontal = cv2.erode(horizontal, horizontalStructure) horizontal = cv2.dilate(horizontal, horizontalStructure) - kernel = np.ones((5,5),np.uint8) + kernel = np.ones((5, 5), np.uint8) + horizontal = cv2.dilate(horizontal, kernel, iterations=2) + horizontal = cv2.erode(horizontal, kernel, iterations=2) - horizontal = cv2.dilate(horizontal,kernel,iterations = 2) - horizontal = cv2.erode(horizontal,kernel,iterations = 2) - - ### - #print(np.unique(horizontal),'uni') - horizontal=cv2.fillPoly(horizontal,pts=cnts_hor_e,color=(255,255,255)) + # print(np.unique(horizontal),'uni') + horizontal = cv2.fillPoly(horizontal, pts=cnts_hor_e, color=(255, 255, 255)) ### - - - - #plt.imshow(horizontal) - #plt.show() - + + # plt.imshow(horizontal) + # plt.show() + rows = vertical.shape[0] verticalsize = rows // 30 # Create structure element for extracting vertical lines through morphology operations @@ -1465,655 +1473,652 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, # Apply morphology operations vertical = cv2.erode(vertical, verticalStructure) vertical = cv2.dilate(vertical, verticalStructure) - - vertical = cv2.dilate(vertical,kernel,iterations = 1) + + vertical = cv2.dilate(vertical, kernel, iterations=1) # Show extracted vertical lines - horizontal,special_separators=combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(vertical,horizontal,num_col_classifier) - - - #plt.imshow(horizontal) - #plt.show() - #print(vertical.shape,np.unique(vertical),'verticalvertical') - separators_closeup_new[:,:][vertical[:,:]!=0]=1 - separators_closeup_new[:,:][horizontal[:,:]!=0]=1 - - ##plt.imshow(separators_closeup_new) - ##plt.show() - ##separators_closeup_n - vertical=np.repeat(vertical[:, :, np.newaxis], 3, axis=2) - vertical=vertical.astype(np.uint8) - - ##plt.plot(vertical[:,:,0].sum(axis=0)) - ##plt.show() - - #plt.plot(vertical[:,:,0].sum(axis=1)) - #plt.show() + horizontal, special_separators = combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(vertical, + horizontal, + num_col_classifier) + + # plt.imshow(horizontal) + # plt.show() + # print(vertical.shape,np.unique(vertical),'verticalvertical') + separators_closeup_new[:, :][vertical[:, :] != 0] = 1 + separators_closeup_new[:, :][horizontal[:, :] != 0] = 1 + + # plt.imshow(separators_closeup_new) + # plt.show() + # separators_closeup_n + vertical = np.repeat(vertical[:, :, np.newaxis], 3, axis=2) + vertical = vertical.astype(np.uint8) + + # plt.plot(vertical[:,:,0].sum(axis=0)) + # plt.show() + + # plt.plot(vertical[:,:,0].sum(axis=1)) + # plt.show() imgray = cv2.cvtColor(vertical, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - contours_line_vers,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - slope_lines,dist_x, x_min_main ,x_max_main ,cy_main,slope_lines_org,y_min_main, y_max_main, cx_main=find_features_of_lines(contours_line_vers) - #print(slope_lines,'vertical') - args=np.array( range(len(slope_lines) )) - args_ver=args[slope_lines==1] - dist_x_ver=dist_x[slope_lines==1] - y_min_main_ver=y_min_main[slope_lines==1] - y_max_main_ver=y_max_main[slope_lines==1] - x_min_main_ver=x_min_main[slope_lines==1] - x_max_main_ver=x_max_main[slope_lines==1] - cx_main_ver=cx_main[slope_lines==1] - dist_y_ver=y_max_main_ver-y_min_main_ver - len_y=separators_closeup.shape[0]/3.0 - - - #plt.imshow(horizontal) - #plt.show() - - horizontal=np.repeat(horizontal[:, :, np.newaxis], 3, axis=2) - horizontal=horizontal.astype(np.uint8) + + contours_line_vers, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = find_features_of_lines( + contours_line_vers) + # print(slope_lines,'vertical') + args = np.array(range(len(slope_lines))) + args_ver = args[slope_lines == 1] + dist_x_ver = dist_x[slope_lines == 1] + y_min_main_ver = y_min_main[slope_lines == 1] + y_max_main_ver = y_max_main[slope_lines == 1] + x_min_main_ver = x_min_main[slope_lines == 1] + x_max_main_ver = x_max_main[slope_lines == 1] + cx_main_ver = cx_main[slope_lines == 1] + dist_y_ver = y_max_main_ver - y_min_main_ver + len_y = separators_closeup.shape[0] / 3.0 + + # plt.imshow(horizontal) + # plt.show() + + horizontal = np.repeat(horizontal[:, :, np.newaxis], 3, axis=2) + horizontal = horizontal.astype(np.uint8) imgray = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) - - contours_line_hors,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - slope_lines,dist_x, x_min_main ,x_max_main ,cy_main,slope_lines_org,y_min_main, y_max_main, cx_main=find_features_of_lines(contours_line_hors) - - slope_lines_org_hor=slope_lines_org[slope_lines==0] - args=np.array( range(len(slope_lines) )) - len_x=separators_closeup.shape[1]/5.0 - - dist_y=np.abs(y_max_main-y_min_main) - - args_hor=args[slope_lines==0] - dist_x_hor=dist_x[slope_lines==0] - y_min_main_hor=y_min_main[slope_lines==0] - y_max_main_hor=y_max_main[slope_lines==0] - x_min_main_hor=x_min_main[slope_lines==0] - x_max_main_hor=x_max_main[slope_lines==0] - dist_y_hor=dist_y[slope_lines==0] - cy_main_hor=cy_main[slope_lines==0] - - args_hor=args_hor[dist_x_hor>=len_x/2.0] - x_max_main_hor=x_max_main_hor[dist_x_hor>=len_x/2.0] - x_min_main_hor=x_min_main_hor[dist_x_hor>=len_x/2.0] - cy_main_hor=cy_main_hor[dist_x_hor>=len_x/2.0] - y_min_main_hor=y_min_main_hor[dist_x_hor>=len_x/2.0] - y_max_main_hor=y_max_main_hor[dist_x_hor>=len_x/2.0] - dist_y_hor=dist_y_hor[dist_x_hor>=len_x/2.0] - - slope_lines_org_hor=slope_lines_org_hor[dist_x_hor>=len_x/2.0] - dist_x_hor=dist_x_hor[dist_x_hor>=len_x/2.0] - - - matrix_of_lines_ch=np.zeros((len(cy_main_hor)+len(cx_main_ver),10)) - - matrix_of_lines_ch[:len(cy_main_hor),0]=args_hor - matrix_of_lines_ch[len(cy_main_hor):,0]=args_ver - - - matrix_of_lines_ch[len(cy_main_hor):,1]=cx_main_ver - - matrix_of_lines_ch[:len(cy_main_hor),2]=x_min_main_hor+50#x_min_main_hor+150 - matrix_of_lines_ch[len(cy_main_hor):,2]=x_min_main_ver - - matrix_of_lines_ch[:len(cy_main_hor),3]=x_max_main_hor-50#x_max_main_hor-150 - matrix_of_lines_ch[len(cy_main_hor):,3]=x_max_main_ver - - matrix_of_lines_ch[:len(cy_main_hor),4]=dist_x_hor - matrix_of_lines_ch[len(cy_main_hor):,4]=dist_x_ver - - matrix_of_lines_ch[:len(cy_main_hor),5]=cy_main_hor - - - matrix_of_lines_ch[:len(cy_main_hor),6]=y_min_main_hor - matrix_of_lines_ch[len(cy_main_hor):,6]=y_min_main_ver - - matrix_of_lines_ch[:len(cy_main_hor),7]=y_max_main_hor - matrix_of_lines_ch[len(cy_main_hor):,7]=y_max_main_ver - - matrix_of_lines_ch[:len(cy_main_hor),8]=dist_y_hor - matrix_of_lines_ch[len(cy_main_hor):,8]=dist_y_ver - - - matrix_of_lines_ch[len(cy_main_hor):,9]=1 - - - + + contours_line_hors, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + slope_lines, dist_x, x_min_main, x_max_main, cy_main, slope_lines_org, y_min_main, y_max_main, cx_main = find_features_of_lines( + contours_line_hors) + + slope_lines_org_hor = slope_lines_org[slope_lines == 0] + args = np.array(range(len(slope_lines))) + len_x = separators_closeup.shape[1] / 5.0 + + dist_y = np.abs(y_max_main - y_min_main) + + args_hor = args[slope_lines == 0] + dist_x_hor = dist_x[slope_lines == 0] + y_min_main_hor = y_min_main[slope_lines == 0] + y_max_main_hor = y_max_main[slope_lines == 0] + x_min_main_hor = x_min_main[slope_lines == 0] + x_max_main_hor = x_max_main[slope_lines == 0] + dist_y_hor = dist_y[slope_lines == 0] + cy_main_hor = cy_main[slope_lines == 0] + + args_hor = args_hor[dist_x_hor >= len_x / 2.0] + x_max_main_hor = x_max_main_hor[dist_x_hor >= len_x / 2.0] + x_min_main_hor = x_min_main_hor[dist_x_hor >= len_x / 2.0] + cy_main_hor = cy_main_hor[dist_x_hor >= len_x / 2.0] + y_min_main_hor = y_min_main_hor[dist_x_hor >= len_x / 2.0] + y_max_main_hor = y_max_main_hor[dist_x_hor >= len_x / 2.0] + dist_y_hor = dist_y_hor[dist_x_hor >= len_x / 2.0] + + slope_lines_org_hor = slope_lines_org_hor[dist_x_hor >= len_x / 2.0] + dist_x_hor = dist_x_hor[dist_x_hor >= len_x / 2.0] + + matrix_of_lines_ch = np.zeros((len(cy_main_hor) + len(cx_main_ver), 10)) + + matrix_of_lines_ch[:len(cy_main_hor), 0] = args_hor + matrix_of_lines_ch[len(cy_main_hor):, 0] = args_ver + + matrix_of_lines_ch[len(cy_main_hor):, 1] = cx_main_ver + + matrix_of_lines_ch[:len(cy_main_hor), 2] = x_min_main_hor + 50 # x_min_main_hor+150 + matrix_of_lines_ch[len(cy_main_hor):, 2] = x_min_main_ver + + matrix_of_lines_ch[:len(cy_main_hor), 3] = x_max_main_hor - 50 # x_max_main_hor-150 + matrix_of_lines_ch[len(cy_main_hor):, 3] = x_max_main_ver + + matrix_of_lines_ch[:len(cy_main_hor), 4] = dist_x_hor + matrix_of_lines_ch[len(cy_main_hor):, 4] = dist_x_ver + + matrix_of_lines_ch[:len(cy_main_hor), 5] = cy_main_hor + + matrix_of_lines_ch[:len(cy_main_hor), 6] = y_min_main_hor + matrix_of_lines_ch[len(cy_main_hor):, 6] = y_min_main_ver + + matrix_of_lines_ch[:len(cy_main_hor), 7] = y_max_main_hor + matrix_of_lines_ch[len(cy_main_hor):, 7] = y_max_main_ver + + matrix_of_lines_ch[:len(cy_main_hor), 8] = dist_y_hor + matrix_of_lines_ch[len(cy_main_hor):, 8] = dist_y_ver + + matrix_of_lines_ch[len(cy_main_hor):, 9] = 1 + if contours_h is not None: - slope_lines_head,dist_x_head, x_min_main_head ,x_max_main_head ,cy_main_head,slope_lines_org_head,y_min_main_head, y_max_main_head, cx_main_head=find_features_of_lines(contours_h) - matrix_l_n=np.zeros((matrix_of_lines_ch.shape[0]+len(cy_main_head),matrix_of_lines_ch.shape[1])) - matrix_l_n[:matrix_of_lines_ch.shape[0],:]=np.copy(matrix_of_lines_ch[:,:]) - args_head=np.array(range(len(cy_main_head)))+len(cy_main_hor) - - matrix_l_n[matrix_of_lines_ch.shape[0]:,0]=args_head - matrix_l_n[matrix_of_lines_ch.shape[0]:,2]=x_min_main_head+30 - matrix_l_n[matrix_of_lines_ch.shape[0]:,3]=x_max_main_head-30 - - matrix_l_n[matrix_of_lines_ch.shape[0]:,4]=dist_x_head - - matrix_l_n[matrix_of_lines_ch.shape[0]:,5]=y_min_main_head-3-8 - matrix_l_n[matrix_of_lines_ch.shape[0]:,6]=y_min_main_head-5-8 - matrix_l_n[matrix_of_lines_ch.shape[0]:,7]=y_max_main_head#y_min_main_head+1-8 - matrix_l_n[matrix_of_lines_ch.shape[0]:,8]=4 - - matrix_of_lines_ch=np.copy(matrix_l_n) - - - cy_main_splitters=cy_main_hor[ (x_min_main_hor<=.16*region_pre_p.shape[1]) & (x_max_main_hor>=.84*region_pre_p.shape[1] )] - - cy_main_splitters=np.array( list(cy_main_splitters)+list(special_separators)) - + slope_lines_head, dist_x_head, x_min_main_head, x_max_main_head, cy_main_head, slope_lines_org_head, y_min_main_head, y_max_main_head, cx_main_head = find_features_of_lines( + contours_h) + matrix_l_n = np.zeros((matrix_of_lines_ch.shape[0] + len(cy_main_head), matrix_of_lines_ch.shape[1])) + matrix_l_n[:matrix_of_lines_ch.shape[0], :] = np.copy(matrix_of_lines_ch[:, :]) + args_head = np.array(range(len(cy_main_head))) + len(cy_main_hor) + + matrix_l_n[matrix_of_lines_ch.shape[0]:, 0] = args_head + matrix_l_n[matrix_of_lines_ch.shape[0]:, 2] = x_min_main_head + 30 + matrix_l_n[matrix_of_lines_ch.shape[0]:, 3] = x_max_main_head - 30 + + matrix_l_n[matrix_of_lines_ch.shape[0]:, 4] = dist_x_head + + matrix_l_n[matrix_of_lines_ch.shape[0]:, 5] = y_min_main_head - 3 - 8 + matrix_l_n[matrix_of_lines_ch.shape[0]:, 6] = y_min_main_head - 5 - 8 + matrix_l_n[matrix_of_lines_ch.shape[0]:, 7] = y_max_main_head # y_min_main_head+1-8 + matrix_l_n[matrix_of_lines_ch.shape[0]:, 8] = 4 + + matrix_of_lines_ch = np.copy(matrix_l_n) + + cy_main_splitters = cy_main_hor[ + (x_min_main_hor <= .16 * region_pre_p.shape[1]) & (x_max_main_hor >= .84 * region_pre_p.shape[1])] + + cy_main_splitters = np.array(list(cy_main_splitters) + list(special_separators)) + if contours_h is not None: try: - cy_main_splitters_head=cy_main_head[ (x_min_main_head<=.16*region_pre_p.shape[1]) & (x_max_main_head>=.84*region_pre_p.shape[1] )] - cy_main_splitters=np.array( list(cy_main_splitters)+list(cy_main_splitters_head)) + cy_main_splitters_head = cy_main_head[ + (x_min_main_head <= .16 * region_pre_p.shape[1]) & (x_max_main_head >= .84 * region_pre_p.shape[1])] + cy_main_splitters = np.array(list(cy_main_splitters) + list(cy_main_splitters_head)) except: pass - args_cy_splitter=np.argsort(cy_main_splitters) - - cy_main_splitters_sort=cy_main_splitters[args_cy_splitter] - - splitter_y_new=[] + args_cy_splitter = np.argsort(cy_main_splitters) + + cy_main_splitters_sort = cy_main_splitters[args_cy_splitter] + + splitter_y_new = [] splitter_y_new.append(0) for i in range(len(cy_main_splitters_sort)): - splitter_y_new.append( cy_main_splitters_sort[i] ) - + splitter_y_new.append(cy_main_splitters_sort[i]) + splitter_y_new.append(region_pre_p.shape[0]) - - splitter_y_new_diff=np.diff(splitter_y_new)/float(region_pre_p.shape[0])*100 - - args_big_parts=np.array(range(len(splitter_y_new_diff))) [ splitter_y_new_diff>22 ] - - - - regions_without_separators=return_regions_without_separators(region_pre_p) - - - length_y_threshold=regions_without_separators.shape[0]/4.0 - - num_col_fin=0 - peaks_neg_fin_fin=[] - + + splitter_y_new_diff = np.diff(splitter_y_new) / float(region_pre_p.shape[0]) * 100 + + args_big_parts = np.array(range(len(splitter_y_new_diff)))[splitter_y_new_diff > 22] + + regions_without_separators = return_regions_without_separators(region_pre_p) + + length_y_threshold = regions_without_separators.shape[0] / 4.0 + + num_col_fin = 0 + peaks_neg_fin_fin = [] + for itiles in args_big_parts: - - - regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]):int(splitter_y_new[itiles+1]),:,0] - #image_page_background_zero_tile=image_page_background_zero[int(splitter_y_new[itiles]):int(splitter_y_new[itiles+1]),:] - - #print(regions_without_separators_tile.shape) - ##plt.imshow(regions_without_separators_tile) - ##plt.show() - - #num_col, peaks_neg_fin=self.find_num_col(regions_without_separators_tile,multiplier=6.0) - - #regions_without_separators_tile=cv2.erode(regions_without_separators_tile,kernel,iterations = 3) + + regions_without_separators_tile = regions_without_separators[ + int(splitter_y_new[itiles]):int(splitter_y_new[itiles + 1]), :, 0] + # image_page_background_zero_tile=image_page_background_zero[int(splitter_y_new[itiles]):int(splitter_y_new[itiles+1]),:] + + # print(regions_without_separators_tile.shape) + # plt.imshow(regions_without_separators_tile) + # plt.show() + + # num_col, peaks_neg_fin=self.find_num_col(regions_without_separators_tile,multiplier=6.0) + + # regions_without_separators_tile=cv2.erode(regions_without_separators_tile,kernel,iterations = 3) # try: - num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0) + num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, + multiplier=7.0) except: num_col = 0 peaks_neg_fin = [] - - if num_col>num_col_fin: - num_col_fin=num_col - peaks_neg_fin_fin=peaks_neg_fin - - - if len(args_big_parts)==1 and (len(peaks_neg_fin_fin)+1)=500] - peaks_neg_fin=peaks_neg_fin[peaks_neg_fin<=(vertical.shape[1]-500)] - peaks_neg_fin_fin=peaks_neg_fin[:] - - #print(peaks_neg_fin_fin,'peaks_neg_fin_fintaza') - - - return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n - - -def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, tables, right2left_readingorder): + + if num_col > num_col_fin: + num_col_fin = num_col + peaks_neg_fin_fin = peaks_neg_fin + + if len(args_big_parts) == 1 and (len(peaks_neg_fin_fin) + 1) < num_col_classifier: + peaks_neg_fin = find_num_col_by_vertical_lines(vertical) + peaks_neg_fin = peaks_neg_fin[peaks_neg_fin >= 500] + peaks_neg_fin = peaks_neg_fin[peaks_neg_fin <= (vertical.shape[1] - 500)] + peaks_neg_fin_fin = peaks_neg_fin[:] + + # print(peaks_neg_fin_fin,'peaks_neg_fin_fintaza') + + return num_col_fin, peaks_neg_fin_fin, matrix_of_lines_ch, splitter_y_new, separators_closeup_n + + +def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, + num_col_classifier, erosion_hurts, tables, right2left_readingorder): if right2left_readingorder: - regions_without_separators = cv2.flip(regions_without_separators,1) - boxes=[] + regions_without_separators = cv2.flip(regions_without_separators, 1) + boxes = [] peaks_neg_tot_tables = [] - for i in range(len(splitter_y_new)-1): - #print(splitter_y_new[i],splitter_y_new[i+1]) - matrix_new=matrix_of_lines_ch[:,:][ (matrix_of_lines_ch[:,6]> splitter_y_new[i] ) & (matrix_of_lines_ch[:,7]< splitter_y_new[i+1] ) ] - #print(len( matrix_new[:,9][matrix_new[:,9]==1] )) - - #print(matrix_new[:,8][matrix_new[:,9]==1],'gaddaaa') - + for i in range(len(splitter_y_new) - 1): + # print(splitter_y_new[i],splitter_y_new[i+1]) + matrix_new = matrix_of_lines_ch[:, :][ + (matrix_of_lines_ch[:, 6] > splitter_y_new[i]) & (matrix_of_lines_ch[:, 7] < splitter_y_new[i + 1])] + # print(len( matrix_new[:,9][matrix_new[:,9]==1] )) + + # print(matrix_new[:,8][matrix_new[:,9]==1],'gaddaaa') + # check to see is there any vertical separator to find holes. - if 1>0:#len( matrix_new[:,9][matrix_new[:,9]==1] )>0 and np.max(matrix_new[:,8][matrix_new[:,9]==1])>=0.1*(np.abs(splitter_y_new[i+1]-splitter_y_new[i] )): - + if 1 > 0: # len( matrix_new[:,9][matrix_new[:,9]==1] )>0 and np.max(matrix_new[:,8][matrix_new[:,9]==1])>=0.1*(np.abs(splitter_y_new[i+1]-splitter_y_new[i] )): + try: if erosion_hurts: - num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:], num_col_classifier, tables, multiplier=6.) + num_col, peaks_neg_fin = find_num_col( + regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i + 1]), :], + num_col_classifier, tables, multiplier=6.) else: - num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],num_col_classifier, tables, multiplier=7.) + num_col, peaks_neg_fin = find_num_col( + regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i + 1]), :], + num_col_classifier, tables, multiplier=7.) except: - peaks_neg_fin=[] + peaks_neg_fin = [] num_col = 0 - try: - peaks_neg_fin_org=np.copy(peaks_neg_fin) - if (len(peaks_neg_fin)+1)=len(peaks_neg_fin2): - peaks_neg_fin=list(np.copy(peaks_neg_fin1)) + peaks_neg_fin2 = [] + + if len(peaks_neg_fin1) >= len(peaks_neg_fin2): + peaks_neg_fin = list(np.copy(peaks_neg_fin1)) else: - peaks_neg_fin=list(np.copy(peaks_neg_fin2)) - - - - peaks_neg_fin=list(np.array(peaks_neg_fin)+peaks_neg_fin_early[i_n]) - - if i_n!=(len(peaks_neg_fin_early)-2): - peaks_neg_fin_rev.append(peaks_neg_fin_early[i_n+1]) - #print(peaks_neg_fin,'peaks_neg_fin') - peaks_neg_fin_rev=peaks_neg_fin_rev+peaks_neg_fin - - - - - - if len(peaks_neg_fin_rev)>=len(peaks_neg_fin_org): - peaks_neg_fin=list(np.sort(peaks_neg_fin_rev)) - num_col=len(peaks_neg_fin) + peaks_neg_fin = list(np.copy(peaks_neg_fin2)) + + peaks_neg_fin = list(np.array(peaks_neg_fin) + peaks_neg_fin_early[i_n]) + + if i_n != (len(peaks_neg_fin_early) - 2): + peaks_neg_fin_rev.append(peaks_neg_fin_early[i_n + 1]) + # print(peaks_neg_fin,'peaks_neg_fin') + peaks_neg_fin_rev = peaks_neg_fin_rev + peaks_neg_fin + + if len(peaks_neg_fin_rev) >= len(peaks_neg_fin_org): + peaks_neg_fin = list(np.sort(peaks_neg_fin_rev)) + num_col = len(peaks_neg_fin) else: - peaks_neg_fin=list(np.copy(peaks_neg_fin_org)) - num_col=len(peaks_neg_fin) - - #print(peaks_neg_fin,'peaks_neg_fin') + peaks_neg_fin = list(np.copy(peaks_neg_fin_org)) + num_col = len(peaks_neg_fin) + + # print(peaks_neg_fin,'peaks_neg_fin') except: pass - #num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=7.0) - x_min_hor_some=matrix_new[:,2][ (matrix_new[:,9]==0) ] - x_max_hor_some=matrix_new[:,3][ (matrix_new[:,9]==0) ] - cy_hor_some=matrix_new[:,5][ (matrix_new[:,9]==0) ] - cy_hor_diff=matrix_new[:,7][ (matrix_new[:,9]==0) ] - arg_org_hor_some=matrix_new[:,0][ (matrix_new[:,9]==0) ] - + # num_col, peaks_neg_fin=find_num_col(regions_without_separators[int(splitter_y_new[i]):int(splitter_y_new[i+1]),:],multiplier=7.0) + x_min_hor_some = matrix_new[:, 2][(matrix_new[:, 9] == 0)] + x_max_hor_some = matrix_new[:, 3][(matrix_new[:, 9] == 0)] + cy_hor_some = matrix_new[:, 5][(matrix_new[:, 9] == 0)] + cy_hor_diff = matrix_new[:, 7][(matrix_new[:, 9] == 0)] + arg_org_hor_some = matrix_new[:, 0][(matrix_new[:, 9] == 0)] + if right2left_readingorder: x_max_hor_some_new = regions_without_separators.shape[1] - x_min_hor_some x_min_hor_some_new = regions_without_separators.shape[1] - x_max_hor_some - - x_min_hor_some =list(np.copy(x_min_hor_some_new)) - x_max_hor_some =list(np.copy(x_max_hor_some_new)) - - - - - - peaks_neg_tot=return_points_with_boundies(peaks_neg_fin,0, regions_without_separators[:,:].shape[1]) - + + x_min_hor_some = list(np.copy(x_min_hor_some_new)) + x_max_hor_some = list(np.copy(x_max_hor_some_new)) + + peaks_neg_tot = return_points_with_boundies(peaks_neg_fin, 0, regions_without_separators[:, :].shape[1]) + peaks_neg_tot_tables.append(peaks_neg_tot) - - reading_order_type,x_starting,x_ending,y_type_2,y_diff_type_2,y_lines_without_mother,x_start_without_mother,x_end_without_mother,there_is_sep_with_child,y_lines_with_child_without_mother,x_start_with_child_without_mother,x_end_with_child_without_mother,new_main_sep_y=return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some,x_max_hor_some,cy_hor_some,peaks_neg_tot,cy_hor_diff) - - if (reading_order_type==1) or (reading_order_type==0 and (len(y_lines_without_mother)>=2 or there_is_sep_with_child==1)): + reading_order_type, x_starting, x_ending, y_type_2, y_diff_type_2, y_lines_without_mother, x_start_without_mother, x_end_without_mother, there_is_sep_with_child, y_lines_with_child_without_mother, x_start_with_child_without_mother, x_end_with_child_without_mother, new_main_sep_y = return_x_start_end_mothers_childs_and_type_of_reading_order( + x_min_hor_some, x_max_hor_some, cy_hor_some, peaks_neg_tot, cy_hor_diff) + + if (reading_order_type == 1) or ( + reading_order_type == 0 and (len(y_lines_without_mother) >= 2 or there_is_sep_with_child == 1)): - try: - y_grenze=int(splitter_y_new[i])+300 - - - - #check if there is a big separator in this y_mains_sep_ohne_grenzen - - args_early_ys=np.array(range(len(y_type_2))) - - #print(args_early_ys,'args_early_ys') - #print(int(splitter_y_new[i]),int(splitter_y_new[i+1])) - - y_type_2_up=np.array(y_type_2)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - x_starting_up=np.array(x_starting)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - x_ending_up=np.array(x_ending)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - y_diff_type_2_up=np.array(y_diff_type_2)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - args_up=args_early_ys[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - - - - if len(y_type_2_up)>0: - y_main_separator_up=y_type_2_up[(x_starting_up==0) & (x_ending_up==(len(peaks_neg_tot)-1) )] - y_diff_main_separator_up=y_diff_type_2_up[(x_starting_up==0) & (x_ending_up==(len(peaks_neg_tot)-1) )] - args_main_to_deleted=args_up[(x_starting_up==0) & (x_ending_up==(len(peaks_neg_tot)-1) )] - #print(y_main_separator_up,y_diff_main_separator_up,args_main_to_deleted,'fffffjammmm') - - if len(y_diff_main_separator_up)>0: - args_to_be_kept=np.array( list( set(args_early_ys)-set(args_main_to_deleted) ) ) - #print(args_to_be_kept,'args_to_be_kept') - boxes.append([0,peaks_neg_tot[len(peaks_neg_tot)-1],int(splitter_y_new[i]),int( np.max(y_diff_main_separator_up))]) - splitter_y_new[i]=[ np.max(y_diff_main_separator_up) ][0] - - #print(splitter_y_new[i],'splitter_y_new[i]') - y_type_2=np.array(y_type_2)[args_to_be_kept] - x_starting=np.array(x_starting)[args_to_be_kept] - x_ending=np.array(x_ending)[args_to_be_kept] - y_diff_type_2=np.array(y_diff_type_2)[args_to_be_kept] - - #print('galdiha') - y_grenze=int(splitter_y_new[i])+200 - - - args_early_ys2=np.array(range(len(y_type_2))) - y_type_2_up=np.array(y_type_2)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - x_starting_up=np.array(x_starting)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - x_ending_up=np.array(x_ending)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - y_diff_type_2_up=np.array(y_diff_type_2)[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - args_up2=args_early_ys2[( np.array(y_type_2)>int(splitter_y_new[i]) ) & (np.array(y_type_2)<=y_grenze)] - - - #print(y_type_2_up,x_starting_up,x_ending_up,'didid') - - nodes_in=[] + y_grenze = int(splitter_y_new[i]) + 300 + + # check if there is a big separator in this y_mains_sep_ohne_grenzen + + args_early_ys = np.array(range(len(y_type_2))) + + # print(args_early_ys,'args_early_ys') + # print(int(splitter_y_new[i]),int(splitter_y_new[i+1])) + + y_type_2_up = np.array(y_type_2)[ + (np.array(y_type_2) > int(splitter_y_new[i])) & (np.array(y_type_2) <= y_grenze)] + x_starting_up = np.array(x_starting)[ + (np.array(y_type_2) > int(splitter_y_new[i])) & (np.array(y_type_2) <= y_grenze)] + x_ending_up = np.array(x_ending)[ + (np.array(y_type_2) > int(splitter_y_new[i])) & (np.array(y_type_2) <= y_grenze)] + y_diff_type_2_up = np.array(y_diff_type_2)[ + (np.array(y_type_2) > int(splitter_y_new[i])) & (np.array(y_type_2) <= y_grenze)] + args_up = args_early_ys[ + (np.array(y_type_2) > int(splitter_y_new[i])) & (np.array(y_type_2) <= y_grenze)] + + if len(y_type_2_up) > 0: + y_main_separator_up = y_type_2_up[ + (x_starting_up == 0) & (x_ending_up == (len(peaks_neg_tot) - 1))] + y_diff_main_separator_up = y_diff_type_2_up[ + (x_starting_up == 0) & (x_ending_up == (len(peaks_neg_tot) - 1))] + args_main_to_deleted = args_up[(x_starting_up == 0) & (x_ending_up == (len(peaks_neg_tot) - 1))] + # print(y_main_separator_up,y_diff_main_separator_up,args_main_to_deleted,'fffffjammmm') + + if len(y_diff_main_separator_up) > 0: + args_to_be_kept = np.array(list(set(args_early_ys) - set(args_main_to_deleted))) + # print(args_to_be_kept,'args_to_be_kept') + boxes.append([0, peaks_neg_tot[len(peaks_neg_tot) - 1], int(splitter_y_new[i]), + int(np.max(y_diff_main_separator_up))]) + splitter_y_new[i] = [np.max(y_diff_main_separator_up)][0] + + # print(splitter_y_new[i],'splitter_y_new[i]') + y_type_2 = np.array(y_type_2)[args_to_be_kept] + x_starting = np.array(x_starting)[args_to_be_kept] + x_ending = np.array(x_ending)[args_to_be_kept] + y_diff_type_2 = np.array(y_diff_type_2)[args_to_be_kept] + + # print('galdiha') + y_grenze = int(splitter_y_new[i]) + 200 + + args_early_ys2 = np.array(range(len(y_type_2))) + y_type_2_up = np.array(y_type_2)[ + (np.array(y_type_2) > int(splitter_y_new[i])) & (np.array(y_type_2) <= y_grenze)] + x_starting_up = np.array(x_starting)[ + (np.array(y_type_2) > int(splitter_y_new[i])) & (np.array(y_type_2) <= y_grenze)] + x_ending_up = np.array(x_ending)[ + (np.array(y_type_2) > int(splitter_y_new[i])) & (np.array(y_type_2) <= y_grenze)] + y_diff_type_2_up = np.array(y_diff_type_2)[ + (np.array(y_type_2) > int(splitter_y_new[i])) & (np.array(y_type_2) <= y_grenze)] + args_up2 = args_early_ys2[ + (np.array(y_type_2) > int(splitter_y_new[i])) & (np.array(y_type_2) <= y_grenze)] + + # print(y_type_2_up,x_starting_up,x_ending_up,'didid') + + nodes_in = [] for ij in range(len(x_starting_up)): - nodes_in=nodes_in+list(np.array(range(x_starting_up[ij],x_ending_up[ij]))) - - #print(np.unique(nodes_in),'nodes_in') - - if set(np.unique(nodes_in))==set(np.array(range(len(peaks_neg_tot)-1)) ): + nodes_in = nodes_in + list(np.array(range(x_starting_up[ij], x_ending_up[ij]))) + + # print(np.unique(nodes_in),'nodes_in') + + if set(np.unique(nodes_in)) == set(np.array(range(len(peaks_neg_tot) - 1))): pass - elif set( np.unique(nodes_in) )==set( np.array(range(1,len(peaks_neg_tot)-1)) ): + elif set(np.unique(nodes_in)) == set(np.array(range(1, len(peaks_neg_tot) - 1))): pass else: - #print('burdaydikh') - args_to_be_kept2=np.array( list( set(args_early_ys2)-set(args_up2) ) ) - - if len(args_to_be_kept2)>0: - y_type_2=np.array(y_type_2)[args_to_be_kept2] - x_starting=np.array(x_starting)[args_to_be_kept2] - x_ending=np.array(x_ending)[args_to_be_kept2] - y_diff_type_2=np.array(y_diff_type_2)[args_to_be_kept2] + # print('burdaydikh') + args_to_be_kept2 = np.array(list(set(args_early_ys2) - set(args_up2))) + + if len(args_to_be_kept2) > 0: + y_type_2 = np.array(y_type_2)[args_to_be_kept2] + x_starting = np.array(x_starting)[args_to_be_kept2] + x_ending = np.array(x_ending)[args_to_be_kept2] + y_diff_type_2 = np.array(y_diff_type_2)[args_to_be_kept2] else: pass - - #print('burdaydikh2') - - - - elif len(y_diff_main_separator_up)==0: - nodes_in=[] + + # print('burdaydikh2') + + + + elif len(y_diff_main_separator_up) == 0: + nodes_in = [] for ij in range(len(x_starting_up)): - nodes_in=nodes_in+list(np.array(range(x_starting_up[ij],x_ending_up[ij]))) - - #print(np.unique(nodes_in),'nodes_in2') - #print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))') - - - - if set(np.unique(nodes_in))==set(np.array(range(len(peaks_neg_tot)-1)) ): + nodes_in = nodes_in + list(np.array(range(x_starting_up[ij], x_ending_up[ij]))) + + # print(np.unique(nodes_in),'nodes_in2') + # print(np.array(range(len(peaks_neg_tot)-1)),'np.array(range(len(peaks_neg_tot)-1))') + + if set(np.unique(nodes_in)) == set(np.array(range(len(peaks_neg_tot) - 1))): pass - elif set(np.unique(nodes_in) )==set( np.array(range(1,len(peaks_neg_tot)-1)) ): + elif set(np.unique(nodes_in)) == set(np.array(range(1, len(peaks_neg_tot) - 1))): pass else: - #print('burdaydikh') - #print(args_early_ys,'args_early_ys') - #print(args_up,'args_up') - args_to_be_kept2=np.array( list( set(args_early_ys)-set(args_up) ) ) - - #print(args_to_be_kept2,'args_to_be_kept2') - - #print(len(y_type_2),len(x_starting),len(x_ending),len(y_diff_type_2)) - - if len(args_to_be_kept2)>0: - y_type_2=np.array(y_type_2)[args_to_be_kept2] - x_starting=np.array(x_starting)[args_to_be_kept2] - x_ending=np.array(x_ending)[args_to_be_kept2] - y_diff_type_2=np.array(y_diff_type_2)[args_to_be_kept2] + # print('burdaydikh') + # print(args_early_ys,'args_early_ys') + # print(args_up,'args_up') + args_to_be_kept2 = np.array(list(set(args_early_ys) - set(args_up))) + + # print(args_to_be_kept2,'args_to_be_kept2') + + # print(len(y_type_2),len(x_starting),len(x_ending),len(y_diff_type_2)) + + if len(args_to_be_kept2) > 0: + y_type_2 = np.array(y_type_2)[args_to_be_kept2] + x_starting = np.array(x_starting)[args_to_be_kept2] + x_ending = np.array(x_ending)[args_to_be_kept2] + y_diff_type_2 = np.array(y_diff_type_2)[args_to_be_kept2] else: pass - - #print('burdaydikh2') - - - - - - - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) - y_type_2=np.array(y_type_2) - y_diff_type_2_up=np.array(y_diff_type_2_up) - - #int(splitter_y_new[i]) - - y_lines_by_order=[] - x_start_by_order=[] - x_end_by_order=[] - - if (len(x_end_with_child_without_mother)==0 and reading_order_type==0) or reading_order_type==1: - - - if reading_order_type==1: + + # print('burdaydikh2') + + x_starting = np.array(x_starting) + x_ending = np.array(x_ending) + y_type_2 = np.array(y_type_2) + y_diff_type_2_up = np.array(y_diff_type_2_up) + + # int(splitter_y_new[i]) + + y_lines_by_order = [] + x_start_by_order = [] + x_end_by_order = [] + + if ( + len(x_end_with_child_without_mother) == 0 and reading_order_type == 0) or reading_order_type == 1: + + if reading_order_type == 1: y_lines_by_order.append(int(splitter_y_new[i])) x_start_by_order.append(0) - x_end_by_order.append(len(peaks_neg_tot)-2) + x_end_by_order.append(len(peaks_neg_tot) - 2) else: - #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - - columns_covered_by_mothers=[] - + # print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') + + columns_covered_by_mothers = [] + for dj in range(len(x_start_without_mother)): - columns_covered_by_mothers=columns_covered_by_mothers+list(np.array(range(x_start_without_mother[dj],x_end_without_mother[dj])) ) - columns_covered_by_mothers=list(set(columns_covered_by_mothers)) - - all_columns=np.array(range(len(peaks_neg_tot)-1)) - - columns_not_covered=list( set(all_columns)-set(columns_covered_by_mothers) ) - - - y_type_2=list(y_type_2) - x_starting=list(x_starting) - x_ending=list(x_ending) - + columns_covered_by_mothers = columns_covered_by_mothers + list( + np.array(range(x_start_without_mother[dj], x_end_without_mother[dj]))) + columns_covered_by_mothers = list(set(columns_covered_by_mothers)) + + all_columns = np.array(range(len(peaks_neg_tot) - 1)) + + columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers)) + + y_type_2 = list(y_type_2) + x_starting = list(x_starting) + x_ending = list(x_ending) + for lj in columns_not_covered: y_type_2.append(int(splitter_y_new[i])) x_starting.append(lj) - x_ending.append(lj+1) - ##y_lines_by_order.append(int(splitter_y_new[i])) - ##x_start_by_order.append(0) + x_ending.append(lj + 1) + # y_lines_by_order.append(int(splitter_y_new[i])) + # x_start_by_order.append(0) for lk in range(len(x_start_without_mother)): y_type_2.append(int(splitter_y_new[i])) x_starting.append(x_start_without_mother[lk]) x_ending.append(x_end_without_mother[lk]) - - - y_type_2=np.array(y_type_2) - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) - - - - - ind_args=np.array(range(len(y_type_2))) - #ind_args=np.array(ind_args) - #print(ind_args,'ind_args') - for column in range(len(peaks_neg_tot)-1): - #print(column,'column') - ind_args_in_col=ind_args[x_starting==column] - #print('babali2') - #print(ind_args_in_col,'ind_args_in_col') - ind_args_in_col=np.array(ind_args_in_col) - #print(len(y_type_2)) - y_column=y_type_2[ind_args_in_col] - x_start_column=x_starting[ind_args_in_col] - x_end_column=x_ending[ind_args_in_col] - #print('babali3') - ind_args_col_sorted=np.argsort(y_column) - y_col_sort=y_column[ind_args_col_sorted] - x_start_column_sort=x_start_column[ind_args_col_sorted] - x_end_column_sort=x_end_column[ind_args_col_sorted] - #print('babali4') + + y_type_2 = np.array(y_type_2) + x_starting = np.array(x_starting) + x_ending = np.array(x_ending) + + ind_args = np.array(range(len(y_type_2))) + # ind_args=np.array(ind_args) + # print(ind_args,'ind_args') + for column in range(len(peaks_neg_tot) - 1): + # print(column,'column') + ind_args_in_col = ind_args[x_starting == column] + # print('babali2') + # print(ind_args_in_col,'ind_args_in_col') + ind_args_in_col = np.array(ind_args_in_col) + # print(len(y_type_2)) + y_column = y_type_2[ind_args_in_col] + x_start_column = x_starting[ind_args_in_col] + x_end_column = x_ending[ind_args_in_col] + # print('babali3') + ind_args_col_sorted = np.argsort(y_column) + y_col_sort = y_column[ind_args_col_sorted] + x_start_column_sort = x_start_column[ind_args_col_sorted] + x_end_column_sort = x_end_column[ind_args_col_sorted] + # print('babali4') for ii in range(len(y_col_sort)): - #print('babali5') + # print('babali5') y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) - x_end_by_order.append(x_end_column_sort[ii]-1) - + x_end_by_order.append(x_end_column_sort[ii] - 1) + else: - #print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') - - columns_covered_by_mothers=[] - + # print(x_start_without_mother,x_end_without_mother,peaks_neg_tot,'dodo') + + columns_covered_by_mothers = [] + for dj in range(len(x_start_without_mother)): - columns_covered_by_mothers=columns_covered_by_mothers+list(np.array(range(x_start_without_mother[dj],x_end_without_mother[dj])) ) - columns_covered_by_mothers=list(set(columns_covered_by_mothers)) - - all_columns=np.array(range(len(peaks_neg_tot)-1)) - - columns_not_covered=list( set(all_columns)-set(columns_covered_by_mothers) ) - - - y_type_2=list(y_type_2) - x_starting=list(x_starting) - x_ending=list(x_ending) - + columns_covered_by_mothers = columns_covered_by_mothers + list( + np.array(range(x_start_without_mother[dj], x_end_without_mother[dj]))) + columns_covered_by_mothers = list(set(columns_covered_by_mothers)) + + all_columns = np.array(range(len(peaks_neg_tot) - 1)) + + columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers)) + + y_type_2 = list(y_type_2) + x_starting = list(x_starting) + x_ending = list(x_ending) + for lj in columns_not_covered: y_type_2.append(int(splitter_y_new[i])) x_starting.append(lj) - x_ending.append(lj+1) - ##y_lines_by_order.append(int(splitter_y_new[i])) - ##x_start_by_order.append(0) + x_ending.append(lj + 1) + # y_lines_by_order.append(int(splitter_y_new[i])) + # x_start_by_order.append(0) for lk in range(len(x_start_without_mother)): y_type_2.append(int(splitter_y_new[i])) x_starting.append(x_start_without_mother[lk]) x_ending.append(x_end_without_mother[lk]) - - - y_type_2=np.array(y_type_2) - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) - - columns_covered_by_with_child_no_mothers=[] - + + y_type_2 = np.array(y_type_2) + x_starting = np.array(x_starting) + x_ending = np.array(x_ending) + + columns_covered_by_with_child_no_mothers = [] + for dj in range(len(x_end_with_child_without_mother)): - columns_covered_by_with_child_no_mothers=columns_covered_by_with_child_no_mothers+list(np.array(range(x_start_with_child_without_mother[dj],x_end_with_child_without_mother[dj])) ) - columns_covered_by_with_child_no_mothers=list(set(columns_covered_by_with_child_no_mothers)) - - all_columns=np.array(range(len(peaks_neg_tot)-1)) - - columns_not_covered_child_no_mother=list( set(all_columns)-set(columns_covered_by_with_child_no_mothers) ) - #indexes_to_be_spanned=[] - for i_s in range( len(x_end_with_child_without_mother) ): + columns_covered_by_with_child_no_mothers = columns_covered_by_with_child_no_mothers + list( + np.array( + range(x_start_with_child_without_mother[dj], x_end_with_child_without_mother[dj]))) + columns_covered_by_with_child_no_mothers = list(set(columns_covered_by_with_child_no_mothers)) + + all_columns = np.array(range(len(peaks_neg_tot) - 1)) + + columns_not_covered_child_no_mother = list( + set(all_columns) - set(columns_covered_by_with_child_no_mothers)) + # indexes_to_be_spanned=[] + for i_s in range(len(x_end_with_child_without_mother)): columns_not_covered_child_no_mother.append(x_start_with_child_without_mother[i_s]) - - - - columns_not_covered_child_no_mother=np.sort(columns_not_covered_child_no_mother) - - - - ind_args=np.array(range(len(y_type_2))) - - - + + columns_not_covered_child_no_mother = np.sort(columns_not_covered_child_no_mother) + + ind_args = np.array(range(len(y_type_2))) + for i_s_nc in columns_not_covered_child_no_mother: if i_s_nc in x_start_with_child_without_mother: - x_end_biggest_column=np.array(x_end_with_child_without_mother)[np.array(x_start_with_child_without_mother)==i_s_nc][0] - args_all_biggest_lines=ind_args[(x_starting==i_s_nc) & (x_ending==x_end_biggest_column)] - - args_all_biggest_lines=np.array(args_all_biggest_lines) - y_column_nc=y_type_2[args_all_biggest_lines] - x_start_column_nc=x_starting[args_all_biggest_lines] - x_end_column_nc=x_ending[args_all_biggest_lines] - - y_column_nc=np.sort(y_column_nc) - + x_end_biggest_column = np.array(x_end_with_child_without_mother)[ + np.array(x_start_with_child_without_mother) == i_s_nc][0] + args_all_biggest_lines = ind_args[ + (x_starting == i_s_nc) & (x_ending == x_end_biggest_column)] + + args_all_biggest_lines = np.array(args_all_biggest_lines) + y_column_nc = y_type_2[args_all_biggest_lines] + x_start_column_nc = x_starting[args_all_biggest_lines] + x_end_column_nc = x_ending[args_all_biggest_lines] + + y_column_nc = np.sort(y_column_nc) + for i_c in range(len(y_column_nc)): - if i_c==(len(y_column_nc)-1): - ind_all_lines_betweeen_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & (y_type_2=i_s_nc) & (x_ending<=x_end_biggest_column)] + if i_c == (len(y_column_nc) - 1): + ind_all_lines_betweeen_nm_wc = ind_args[ + (y_type_2 > y_column_nc[i_c]) & (y_type_2 < int(splitter_y_new[i + 1])) & ( + x_starting >= i_s_nc) & (x_ending <= x_end_biggest_column)] else: - ind_all_lines_betweeen_nm_wc=ind_args[(y_type_2>y_column_nc[i_c]) & (y_type_2=i_s_nc) & (x_ending<=x_end_biggest_column)] - - y_all_between_nm_wc=y_type_2[ind_all_lines_betweeen_nm_wc] - x_starting_all_between_nm_wc=x_starting[ind_all_lines_betweeen_nm_wc] - x_ending_all_between_nm_wc=x_ending[ind_all_lines_betweeen_nm_wc] - - x_diff_all_between_nm_wc=x_ending_all_between_nm_wc-x_starting_all_between_nm_wc - - - if len(x_diff_all_between_nm_wc)>0: - biggest=np.argmax(x_diff_all_between_nm_wc) - - - columns_covered_by_mothers=[] - + ind_all_lines_betweeen_nm_wc = ind_args[ + (y_type_2 > y_column_nc[i_c]) & (y_type_2 < y_column_nc[i_c + 1]) & ( + x_starting >= i_s_nc) & (x_ending <= x_end_biggest_column)] + + y_all_between_nm_wc = y_type_2[ind_all_lines_betweeen_nm_wc] + x_starting_all_between_nm_wc = x_starting[ind_all_lines_betweeen_nm_wc] + x_ending_all_between_nm_wc = x_ending[ind_all_lines_betweeen_nm_wc] + + x_diff_all_between_nm_wc = x_ending_all_between_nm_wc - x_starting_all_between_nm_wc + + if len(x_diff_all_between_nm_wc) > 0: + biggest = np.argmax(x_diff_all_between_nm_wc) + + columns_covered_by_mothers = [] + for dj in range(len(x_starting_all_between_nm_wc)): - columns_covered_by_mothers=columns_covered_by_mothers+list(np.array(range(x_starting_all_between_nm_wc[dj],x_ending_all_between_nm_wc[dj])) ) - columns_covered_by_mothers=list(set(columns_covered_by_mothers)) - - - all_columns=np.array(range(i_s_nc,x_end_biggest_column)) - - columns_not_covered=list( set(all_columns)-set(columns_covered_by_mothers) ) - - should_longest_line_be_extended=0 - if len(x_diff_all_between_nm_wc)>0 and set( list( np.array(range(x_starting_all_between_nm_wc[biggest],x_ending_all_between_nm_wc[biggest])) )+list(columns_not_covered) ) !=set(all_columns): - should_longest_line_be_extended=1 - - index_lines_so_close_to_top_separator=np.array(range(len(y_all_between_nm_wc)))[(y_all_between_nm_wc>y_column_nc[i_c]) & (y_all_between_nm_wc<=(y_column_nc[i_c]+500))] - - - if len(index_lines_so_close_to_top_separator)>0: - indexes_remained_after_deleting_closed_lines= np.array( list ( set( list( np.array(range(len(y_all_between_nm_wc))) ) ) -set(list( index_lines_so_close_to_top_separator) ) ) ) - - if len(indexes_remained_after_deleting_closed_lines)>0: - y_all_between_nm_wc=y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_starting_all_between_nm_wc=x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - x_ending_all_between_nm_wc=x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines] - - - y_all_between_nm_wc=list(y_all_between_nm_wc) - x_starting_all_between_nm_wc=list(x_starting_all_between_nm_wc) - x_ending_all_between_nm_wc=list(x_ending_all_between_nm_wc) - - - y_all_between_nm_wc.append(y_column_nc[i_c] ) + columns_covered_by_mothers = columns_covered_by_mothers + list(np.array( + range(x_starting_all_between_nm_wc[dj], x_ending_all_between_nm_wc[dj]))) + columns_covered_by_mothers = list(set(columns_covered_by_mothers)) + + all_columns = np.array(range(i_s_nc, x_end_biggest_column)) + + columns_not_covered = list(set(all_columns) - set(columns_covered_by_mothers)) + + should_longest_line_be_extended = 0 + if len(x_diff_all_between_nm_wc) > 0 and set(list(np.array( + range(x_starting_all_between_nm_wc[biggest], + x_ending_all_between_nm_wc[biggest]))) + list( + columns_not_covered)) != set(all_columns): + should_longest_line_be_extended = 1 + + index_lines_so_close_to_top_separator = \ + np.array(range(len(y_all_between_nm_wc)))[ + (y_all_between_nm_wc > y_column_nc[i_c]) & ( + y_all_between_nm_wc <= (y_column_nc[i_c] + 500))] + + if len(index_lines_so_close_to_top_separator) > 0: + indexes_remained_after_deleting_closed_lines = np.array(list( + set(list(np.array(range(len(y_all_between_nm_wc))))) - set( + list(index_lines_so_close_to_top_separator)))) + + if len(indexes_remained_after_deleting_closed_lines) > 0: + y_all_between_nm_wc = y_all_between_nm_wc[ + indexes_remained_after_deleting_closed_lines] + x_starting_all_between_nm_wc = x_starting_all_between_nm_wc[ + indexes_remained_after_deleting_closed_lines] + x_ending_all_between_nm_wc = x_ending_all_between_nm_wc[ + indexes_remained_after_deleting_closed_lines] + + y_all_between_nm_wc = list(y_all_between_nm_wc) + x_starting_all_between_nm_wc = list(x_starting_all_between_nm_wc) + x_ending_all_between_nm_wc = list(x_ending_all_between_nm_wc) + + y_all_between_nm_wc.append(y_column_nc[i_c]) x_starting_all_between_nm_wc.append(i_s_nc) x_ending_all_between_nm_wc.append(x_end_biggest_column) - - - - - y_all_between_nm_wc=list(y_all_between_nm_wc) - x_starting_all_between_nm_wc=list(x_starting_all_between_nm_wc) - x_ending_all_between_nm_wc=list(x_ending_all_between_nm_wc) - - if len(x_diff_all_between_nm_wc)>0: + + y_all_between_nm_wc = list(y_all_between_nm_wc) + x_starting_all_between_nm_wc = list(x_starting_all_between_nm_wc) + x_ending_all_between_nm_wc = list(x_ending_all_between_nm_wc) + + if len(x_diff_all_between_nm_wc) > 0: try: x_starting_all_between_nm_wc.append(x_starting_all_between_nm_wc[biggest]) x_ending_all_between_nm_wc.append(x_ending_all_between_nm_wc[biggest]) @@ -2121,240 +2126,219 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho except: pass - - for c_n_c in columns_not_covered: y_all_between_nm_wc.append(y_column_nc[i_c]) x_starting_all_between_nm_wc.append(c_n_c) - x_ending_all_between_nm_wc.append(c_n_c+1) - - y_all_between_nm_wc=np.array(y_all_between_nm_wc) - x_starting_all_between_nm_wc=np.array(x_starting_all_between_nm_wc) - x_ending_all_between_nm_wc=np.array(x_ending_all_between_nm_wc) - - ind_args_between=np.array(range(len(x_ending_all_between_nm_wc))) - - for column in range(i_s_nc,x_end_biggest_column): - ind_args_in_col=ind_args_between[x_starting_all_between_nm_wc==column] - #print('babali2') - #print(ind_args_in_col,'ind_args_in_col') - ind_args_in_col=np.array(ind_args_in_col) - #print(len(y_type_2)) - y_column=y_all_between_nm_wc[ind_args_in_col] - x_start_column=x_starting_all_between_nm_wc[ind_args_in_col] - x_end_column=x_ending_all_between_nm_wc[ind_args_in_col] - #print('babali3') - ind_args_col_sorted=np.argsort(y_column) - y_col_sort=y_column[ind_args_col_sorted] - x_start_column_sort=x_start_column[ind_args_col_sorted] - x_end_column_sort=x_end_column[ind_args_col_sorted] - #print('babali4') + x_ending_all_between_nm_wc.append(c_n_c + 1) + + y_all_between_nm_wc = np.array(y_all_between_nm_wc) + x_starting_all_between_nm_wc = np.array(x_starting_all_between_nm_wc) + x_ending_all_between_nm_wc = np.array(x_ending_all_between_nm_wc) + + ind_args_between = np.array(range(len(x_ending_all_between_nm_wc))) + + for column in range(i_s_nc, x_end_biggest_column): + ind_args_in_col = ind_args_between[x_starting_all_between_nm_wc == column] + # print('babali2') + # print(ind_args_in_col,'ind_args_in_col') + ind_args_in_col = np.array(ind_args_in_col) + # print(len(y_type_2)) + y_column = y_all_between_nm_wc[ind_args_in_col] + x_start_column = x_starting_all_between_nm_wc[ind_args_in_col] + x_end_column = x_ending_all_between_nm_wc[ind_args_in_col] + # print('babali3') + ind_args_col_sorted = np.argsort(y_column) + y_col_sort = y_column[ind_args_col_sorted] + x_start_column_sort = x_start_column[ind_args_col_sorted] + x_end_column_sort = x_end_column[ind_args_col_sorted] + # print('babali4') for ii in range(len(y_col_sort)): - #print('babali5') + # print('babali5') y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) - x_end_by_order.append(x_end_column_sort[ii]-1) - - - - - - + x_end_by_order.append(x_end_column_sort[ii] - 1) + else: - - #print(column,'column') - ind_args_in_col=ind_args[x_starting==i_s_nc] - #print('babali2') - #print(ind_args_in_col,'ind_args_in_col') - ind_args_in_col=np.array(ind_args_in_col) - #print(len(y_type_2)) - y_column=y_type_2[ind_args_in_col] - x_start_column=x_starting[ind_args_in_col] - x_end_column=x_ending[ind_args_in_col] - #print('babali3') - ind_args_col_sorted=np.argsort(y_column) - y_col_sort=y_column[ind_args_col_sorted] - x_start_column_sort=x_start_column[ind_args_col_sorted] - x_end_column_sort=x_end_column[ind_args_col_sorted] - #print('babali4') + + # print(column,'column') + ind_args_in_col = ind_args[x_starting == i_s_nc] + # print('babali2') + # print(ind_args_in_col,'ind_args_in_col') + ind_args_in_col = np.array(ind_args_in_col) + # print(len(y_type_2)) + y_column = y_type_2[ind_args_in_col] + x_start_column = x_starting[ind_args_in_col] + x_end_column = x_ending[ind_args_in_col] + # print('babali3') + ind_args_col_sorted = np.argsort(y_column) + y_col_sort = y_column[ind_args_col_sorted] + x_start_column_sort = x_start_column[ind_args_col_sorted] + x_end_column_sort = x_end_column[ind_args_col_sorted] + # print('babali4') for ii in range(len(y_col_sort)): y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) - x_end_by_order.append(x_end_column_sort[ii]-1) + x_end_by_order.append(x_end_column_sort[ii] - 1) - - for il in range(len(y_lines_by_order)): - - - y_copy=list( np.copy(y_lines_by_order) ) - x_start_copy=list( np.copy(x_start_by_order) ) - x_end_copy=list ( np.copy(x_end_by_order) ) - - #print(y_copy,'y_copy') - y_itself=y_copy.pop(il) - x_start_itself=x_start_copy.pop(il) - x_end_itself=x_end_copy.pop(il) - - #print(y_copy,'y_copy2') - - for column in range(x_start_itself,x_end_itself+1): - #print(column,'cols') - y_in_cols=[] + + y_copy = list(np.copy(y_lines_by_order)) + x_start_copy = list(np.copy(x_start_by_order)) + x_end_copy = list(np.copy(x_end_by_order)) + + # print(y_copy,'y_copy') + y_itself = y_copy.pop(il) + x_start_itself = x_start_copy.pop(il) + x_end_itself = x_end_copy.pop(il) + + # print(y_copy,'y_copy2') + + for column in range(x_start_itself, x_end_itself + 1): + # print(column,'cols') + y_in_cols = [] for yic in range(len(y_copy)): - #print('burda') - if y_copy[yic]>y_itself and column>=x_start_copy[yic] and column<=x_end_copy[yic]: + # print('burda') + if y_copy[yic] > y_itself and column >= x_start_copy[yic] and column <= x_end_copy[yic]: y_in_cols.append(y_copy[yic]) - #print('burda2') - #print(y_in_cols,'y_in_cols') - if len(y_in_cols)>0: - y_down=np.min(y_in_cols) + # print('burda2') + # print(y_in_cols,'y_in_cols') + if len(y_in_cols) > 0: + y_down = np.min(y_in_cols) else: - y_down=[int(splitter_y_new[i+1])][0] - #print(y_itself,'y_itself') - boxes.append([peaks_neg_tot[column],peaks_neg_tot[column+1],y_itself,y_down]) + y_down = [int(splitter_y_new[i + 1])][0] + # print(y_itself,'y_itself') + boxes.append([peaks_neg_tot[column], peaks_neg_tot[column + 1], y_itself, y_down]) except: - boxes.append([0,peaks_neg_tot[len(peaks_neg_tot)-1],int(splitter_y_new[i]),int(splitter_y_new[i+1])]) - + boxes.append( + [0, peaks_neg_tot[len(peaks_neg_tot) - 1], int(splitter_y_new[i]), int(splitter_y_new[i + 1])]) - else: - y_lines_by_order=[] - x_start_by_order=[] - x_end_by_order=[] - if len(x_starting)>0: - all_columns = np.array(range(len(peaks_neg_tot)-1)) - columns_covered_by_lines_covered_more_than_2col=[] - + y_lines_by_order = [] + x_start_by_order = [] + x_end_by_order = [] + if len(x_starting) > 0: + all_columns = np.array(range(len(peaks_neg_tot) - 1)) + columns_covered_by_lines_covered_more_than_2col = [] + for dj in range(len(x_starting)): - if set( list(np.array(range(x_starting[dj],x_ending[dj])) ) ) == set(all_columns): + if set(list(np.array(range(x_starting[dj], x_ending[dj])))) == set(all_columns): pass else: - columns_covered_by_lines_covered_more_than_2col=columns_covered_by_lines_covered_more_than_2col+list(np.array(range(x_starting[dj],x_ending[dj])) ) - columns_covered_by_lines_covered_more_than_2col=list(set(columns_covered_by_lines_covered_more_than_2col)) - - - - columns_not_covered=list( set(all_columns)-set(columns_covered_by_lines_covered_more_than_2col) ) - - - y_type_2=list(y_type_2) - x_starting=list(x_starting) - x_ending=list(x_ending) - + columns_covered_by_lines_covered_more_than_2col = columns_covered_by_lines_covered_more_than_2col + list( + np.array(range(x_starting[dj], x_ending[dj]))) + columns_covered_by_lines_covered_more_than_2col = list( + set(columns_covered_by_lines_covered_more_than_2col)) + + columns_not_covered = list(set(all_columns) - set(columns_covered_by_lines_covered_more_than_2col)) + + y_type_2 = list(y_type_2) + x_starting = list(x_starting) + x_ending = list(x_ending) + for lj in columns_not_covered: y_type_2.append(int(splitter_y_new[i])) x_starting.append(lj) - x_ending.append(lj+1) - ##y_lines_by_order.append(int(splitter_y_new[i])) - ##x_start_by_order.append(0) - - #y_type_2.append(int(splitter_y_new[i])) - #x_starting.append(x_starting[0]) - #x_ending.append(x_ending[0]) - - if len(new_main_sep_y)>0: + x_ending.append(lj + 1) + # y_lines_by_order.append(int(splitter_y_new[i])) + # x_start_by_order.append(0) + + # y_type_2.append(int(splitter_y_new[i])) + # x_starting.append(x_starting[0]) + # x_ending.append(x_ending[0]) + + if len(new_main_sep_y) > 0: y_type_2.append(int(splitter_y_new[i])) x_starting.append(0) - x_ending.append(len(peaks_neg_tot)-1) + x_ending.append(len(peaks_neg_tot) - 1) else: y_type_2.append(int(splitter_y_new[i])) x_starting.append(x_starting[0]) x_ending.append(x_ending[0]) - - - y_type_2=np.array(y_type_2) - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) + + y_type_2 = np.array(y_type_2) + x_starting = np.array(x_starting) + x_ending = np.array(x_ending) else: - all_columns=np.array(range(len(peaks_neg_tot)-1)) - columns_not_covered=list( set(all_columns) ) - - - y_type_2=list(y_type_2) - x_starting=list(x_starting) - x_ending=list(x_ending) - + all_columns = np.array(range(len(peaks_neg_tot) - 1)) + columns_not_covered = list(set(all_columns)) + + y_type_2 = list(y_type_2) + x_starting = list(x_starting) + x_ending = list(x_ending) + for lj in columns_not_covered: y_type_2.append(int(splitter_y_new[i])) x_starting.append(lj) - x_ending.append(lj+1) - ##y_lines_by_order.append(int(splitter_y_new[i])) - ##x_start_by_order.append(0) - - - - y_type_2=np.array(y_type_2) - x_starting=np.array(x_starting) - x_ending=np.array(x_ending) - - ind_args=np.array(range(len(y_type_2))) - #ind_args=np.array(ind_args) - for column in range(len(peaks_neg_tot)-1): - #print(column,'column') - ind_args_in_col=ind_args[x_starting==column] - ind_args_in_col=np.array(ind_args_in_col) - #print(len(y_type_2)) - y_column=y_type_2[ind_args_in_col] - x_start_column=x_starting[ind_args_in_col] - x_end_column=x_ending[ind_args_in_col] - - ind_args_col_sorted=np.argsort(y_column) - y_col_sort=y_column[ind_args_col_sorted] - x_start_column_sort=x_start_column[ind_args_col_sorted] - x_end_column_sort=x_end_column[ind_args_col_sorted] - #print('babali4') + x_ending.append(lj + 1) + # y_lines_by_order.append(int(splitter_y_new[i])) + # x_start_by_order.append(0) + + y_type_2 = np.array(y_type_2) + x_starting = np.array(x_starting) + x_ending = np.array(x_ending) + + ind_args = np.array(range(len(y_type_2))) + # ind_args=np.array(ind_args) + for column in range(len(peaks_neg_tot) - 1): + # print(column,'column') + ind_args_in_col = ind_args[x_starting == column] + ind_args_in_col = np.array(ind_args_in_col) + # print(len(y_type_2)) + y_column = y_type_2[ind_args_in_col] + x_start_column = x_starting[ind_args_in_col] + x_end_column = x_ending[ind_args_in_col] + + ind_args_col_sorted = np.argsort(y_column) + y_col_sort = y_column[ind_args_col_sorted] + x_start_column_sort = x_start_column[ind_args_col_sorted] + x_end_column_sort = x_end_column[ind_args_col_sorted] + # print('babali4') for ii in range(len(y_col_sort)): - #print('babali5') + # print('babali5') y_lines_by_order.append(y_col_sort[ii]) x_start_by_order.append(x_start_column_sort[ii]) - x_end_by_order.append(x_end_column_sort[ii]-1) - - + x_end_by_order.append(x_end_column_sort[ii] - 1) + for il in range(len(y_lines_by_order)): - - - y_copy=list( np.copy(y_lines_by_order) ) - x_start_copy=list( np.copy(x_start_by_order) ) - x_end_copy=list ( np.copy(x_end_by_order) ) - - #print(y_copy,'y_copy') - y_itself=y_copy.pop(il) - x_start_itself=x_start_copy.pop(il) - x_end_itself=x_end_copy.pop(il) - - #print(y_copy,'y_copy2') - - for column in range(x_start_itself,x_end_itself+1): - #print(column,'cols') - y_in_cols=[] + + y_copy = list(np.copy(y_lines_by_order)) + x_start_copy = list(np.copy(x_start_by_order)) + x_end_copy = list(np.copy(x_end_by_order)) + + # print(y_copy,'y_copy') + y_itself = y_copy.pop(il) + x_start_itself = x_start_copy.pop(il) + x_end_itself = x_end_copy.pop(il) + + # print(y_copy,'y_copy2') + + for column in range(x_start_itself, x_end_itself + 1): + # print(column,'cols') + y_in_cols = [] for yic in range(len(y_copy)): - #print('burda') - if y_copy[yic]>y_itself and column>=x_start_copy[yic] and column<=x_end_copy[yic]: + # print('burda') + if y_copy[yic] > y_itself and column >= x_start_copy[yic] and column <= x_end_copy[yic]: y_in_cols.append(y_copy[yic]) - #print('burda2') - #print(y_in_cols,'y_in_cols') - if len(y_in_cols)>0: - y_down=np.min(y_in_cols) + # print('burda2') + # print(y_in_cols,'y_in_cols') + if len(y_in_cols) > 0: + y_down = np.min(y_in_cols) else: - y_down=[int(splitter_y_new[i+1])][0] - #print(y_itself,'y_itself') - boxes.append([peaks_neg_tot[column],peaks_neg_tot[column+1],y_itself,y_down]) + y_down = [int(splitter_y_new[i + 1])][0] + # print(y_itself,'y_itself') + boxes.append([peaks_neg_tot[column], peaks_neg_tot[column + 1], y_itself, y_down]) + # else: + # boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]]) - - #else: - #boxes.append([ 0, regions_without_separators[:,:].shape[1] ,splitter_y_new[i],splitter_y_new[i+1]]) - - if right2left_readingorder: + if right2left_readingorder: peaks_neg_tot_tables_new = [] - if len(peaks_neg_tot_tables)>=1: + if len(peaks_neg_tot_tables) >= 1: for peaks_tab_ind in peaks_neg_tot_tables: peaks_neg_tot_tables_ind = regions_without_separators.shape[1] - np.array(peaks_tab_ind) peaks_neg_tot_tables_ind = list(peaks_neg_tot_tables_ind[::-1]) peaks_neg_tot_tables_new.append(peaks_neg_tot_tables_ind) - - + for i in range(len(boxes)): x_start_new = regions_without_separators.shape[1] - boxes[i][1] x_end_new = regions_without_separators.shape[1] - boxes[i][0] From 757987314d4b9b43542ad31bd6c866713c1c5935 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Tue, 5 Mar 2024 23:01:00 +0100 Subject: [PATCH 14/34] Update README.md --- README.md | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index d9b92c8..7941658 100644 --- a/README.md +++ b/README.md @@ -17,13 +17,14 @@ * Detection of reading order * Output in [PAGE-XML](https://github.com/PRImA-Research-Lab/PAGE-XML) * [OCR-D](https://github.com/qurator-spk/eynollah#use-as-ocr-d-processor) interface +* [Examples](https://github.com/qurator-spk/eynollah/wiki#examples) ## Installation -Python versions `3.8-3.11` with Tensorflow versions >=`2.12` on Linux are currently supported. While we can not provide support for Windows or MacOS, Windows users may be able to install and run the tool through Linux in [WSL](https://learn.microsoft.com/en-us/windows/wsl/). +Python versions `3.8-3.11` with Tensorflow versions >=`2.12` on Linux are currently supported. For (limited) GPU support the CUDA toolkit needs to be installed. -You can either install from PyPI via +You can either install from PyPI ``` pip install eynollah @@ -39,9 +40,14 @@ cd eynollah; pip install -e . Alternatively, run `make install` or `make install-dev` for editable installation. ## Models -Pre-trained models can be downloaded from [qurator-data.de](https://qurator-data.de/eynollah/). In case you want to train your own model with Eynollah, have a look at [`train`](https://github.com/qurator-spk/eynollah/tree/main/eynollah/eynollah/train). +Pre-trained models can be downloaded either from [qurator-data.de](https://qurator-data.de/eynollah/) or [huggingface](https://huggingface.co/SBB). -## Usage +## Train +🚧 **Work in progress** + +In case you want to train your own model, have a look at [`train`](https://github.com/qurator-spk/eynollah/tree/main/eynollah/eynollah/train). + +## Use The command-line interface can be called like this: ```sh @@ -76,7 +82,6 @@ If no option is set, the tool will perform layout detection of main regions (bac The tool produces better quality output when RGB images are used as input than greyscale or binarized images. #### Use as OCR-D processor - Eynollah ships with a CLI interface to be used as [OCR-D](https://ocr-d.de) processor. In this case, the source image file group with (preferably) RGB images should be used as input like this: @@ -93,8 +98,11 @@ ocrd-eynollah-segment -I OCR-D-IMG-BIN -O SEG-LINE -P models uses the original (RGB) image despite any binarization that may have occured in previous OCR-D processing steps +#### Additional documentation +Please check the [wiki](https://github.com/qurator-spk/eynollah/wiki). + ## How to cite -If you find this tool useful in your work, please consider citing our paper: +If you find this useful in your work, please consider citing our paper: ```bibtex @inproceedings{rezanezhad2023eynollah, From dd60cd6c9205078965dafa3c650033b52b7a4a28 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Mon, 11 Mar 2024 14:14:38 +0100 Subject: [PATCH 15/34] improve CLI documentation (HT @michalbubula) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7941658..6c2380f 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ The command-line interface can be called like this: ```sh eynollah \ - -i \ + -i | -si \ -o \ -m \ [OPTIONS] From c9c9047e20f9160126810a56e37c56a89dc77d4b Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Mon, 11 Mar 2024 14:16:31 +0100 Subject: [PATCH 16/34] fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6c2380f..c758d01 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ The command-line interface can be called like this: ```sh eynollah \ - -i | -si \ + -i | -di \ -o \ -m \ [OPTIONS] From 896e50bf06e610e3089358a357e761e7ca4c8106 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Mon, 11 Mar 2024 14:37:12 +0100 Subject: [PATCH 17/34] pin TF version 2.13 in requirements.txt (TF 2.14 or greater pulls incompatible Keras 3) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 530dac2..8e582e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ ocrd >= 2.23.3 numpy <1.24.0 scikit-learn >= 0.23.2 -tensorflow >=2.12.0 +tensorflow == 2.13.0 # TF 2.14 or greater pulls incompatible Keras 3 imutils >= 0.5.3 matplotlib setuptools >= 50 From 482511adc0a853b3877e57f05e55ae37384ec271 Mon Sep 17 00:00:00 2001 From: Clemens Neudecker <952378+cneud@users.noreply.github.com> Date: Mon, 11 Mar 2024 16:16:47 +0100 Subject: [PATCH 18/34] clean up --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c758d01..68f819f 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ * [Examples](https://github.com/qurator-spk/eynollah/wiki#examples) ## Installation -Python versions `3.8-3.11` with Tensorflow versions >=`2.12` on Linux are currently supported. +Python versions `3.8-3.11` with Tensorflow version `2.13` on Linux are currently supported. For (limited) GPU support the CUDA toolkit needs to be installed. @@ -52,9 +52,9 @@ The command-line interface can be called like this: ```sh eynollah \ - -i | -di \ + -i | -di \ -o \ - -m \ + -m \ [OPTIONS] ``` @@ -71,7 +71,6 @@ The following options can be used to further configure the processing: | `-ib` | apply binarization (the resulting image is saved to the output directory) | | `-ep` | enable plotting (MUST always be used with `-sl`, `-sd`, `-sa`, `-si` or `-ae`) | | `-ho` | ignore headers for reading order dectection | -| `-di ` | process all images in a directory in batch mode | | `-si ` | save image regions detected to this directory | | `-sd ` | save deskewed image to this directory | | `-sl ` | save layout prediction as plot to this directory | From 263a61c5a323b73f0727b6d3a8974c470f8c81dd Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 15 Mar 2024 21:22:09 +0100 Subject: [PATCH 19/34] restrict tf2 to less than 2.16 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8e582e0..a485d89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ ocrd >= 2.23.3 numpy <1.24.0 scikit-learn >= 0.23.2 -tensorflow == 2.13.0 # TF 2.14 or greater pulls incompatible Keras 3 +tensorflow <2.16.0 # TF 2.16 or greater pulls incompatible Keras 3 imutils >= 0.5.3 matplotlib setuptools >= 50 From 9417667a4b000fbbcd8f770becca337d687e7f3f Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 15 Mar 2024 22:47:22 +0100 Subject: [PATCH 20/34] document compatible TF2 versions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 68f819f..ba90039 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ * [Examples](https://github.com/qurator-spk/eynollah/wiki#examples) ## Installation -Python versions `3.8-3.11` with Tensorflow version `2.13` on Linux are currently supported. +Python versions `3.8-3.11` with Tensorflow versions `<2.16` on Linux are currently supported. For (limited) GPU support the CUDA toolkit needs to be installed. From 3270f49256d19077386b04fa4692600c23e8fce3 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 15 Mar 2024 23:00:20 +0100 Subject: [PATCH 21/34] add DOI badge --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index ba90039..5c95d41 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![CircleCI Build Status](https://circleci.com/gh/qurator-spk/eynollah.svg?style=shield)](https://circleci.com/gh/qurator-spk/eynollah) [![GH Actions Test](https://github.com/qurator-spk/eynollah/actions/workflows/test-eynollah.yml/badge.svg)](https://github.com/qurator-spk/eynollah/actions/workflows/test-eynollah.yml) [![License: ASL](https://img.shields.io/github/license/qurator-spk/eynollah)](https://opensource.org/license/apache-2-0/) +[![DOI](https://img.shields.io/badge/DOI-10.1145%2F3604951.3605513-red)](https://doi.org/10.1145/3604951.3605513) ![](https://user-images.githubusercontent.com/952378/102350683-8a74db80-3fa5-11eb-8c7e-f743f7d6eae2.jpg) From b8144f008b25f487bd3139876b512587fd238489 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 15 Mar 2024 23:41:54 +0100 Subject: [PATCH 22/34] convert `img_name` to string --- eynollah/eynollah/eynollah.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eynollah/eynollah/eynollah.py b/eynollah/eynollah/eynollah.py index f3fda56..a68e26f 100644 --- a/eynollah/eynollah/eynollah.py +++ b/eynollah/eynollah/eynollah.py @@ -3037,7 +3037,7 @@ class Eynollah: t0_tot = time.time() if not self.dir_in: - self.ls_imgs = [1] + self.ls_imgs = str([1]) for img_name in self.ls_imgs: t0 = time.time() From bfde86d2479a7977d35f142fe68888c9be1db5b0 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Fri, 15 Mar 2024 23:54:52 +0100 Subject: [PATCH 23/34] update `ocrd` version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a485d89..263f5b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # ocrd includes opencv, numpy, shapely, click -ocrd >= 2.23.3 +ocrd >= 2.63.3 numpy <1.24.0 scikit-learn >= 0.23.2 tensorflow <2.16.0 # TF 2.16 or greater pulls incompatible Keras 3 From cd128f61aad9eddebf7e4425c356765581add479 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 19 Mar 2024 20:32:25 +0100 Subject: [PATCH 24/34] use tf1 compatibility for keras backend --- eynollah/eynollah/eynollah.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eynollah/eynollah/eynollah.py b/eynollah/eynollah/eynollah.py index a68e26f..300fc53 100644 --- a/eynollah/eynollah/eynollah.py +++ b/eynollah/eynollah/eynollah.py @@ -31,7 +31,7 @@ warnings.filterwarnings("ignore") from scipy.signal import find_peaks import matplotlib.pyplot as plt from scipy.ndimage import gaussian_filter1d -from tensorflow.python.keras.backend import set_session +from tensorflow.compat.v1.keras.backend import set_session from tensorflow.keras import layers from .utils.contour import ( From dc3c676ff416fa555d19c9683fd0736c1d965c2d Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 19 Mar 2024 20:33:14 +0100 Subject: [PATCH 25/34] pin tf2 version to 2.12.1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 263f5b0..7bcc941 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ ocrd >= 2.63.3 numpy <1.24.0 scikit-learn >= 0.23.2 -tensorflow <2.16.0 # TF 2.16 or greater pulls incompatible Keras 3 +tensorflow == 2.12.1 imutils >= 0.5.3 matplotlib setuptools >= 50 From a269cca8c860d3f83d131eabcfabd77572684dda Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 19 Mar 2024 20:39:33 +0100 Subject: [PATCH 26/34] also test Python 3.12 --- .github/workflows/test-eynollah.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index 30c9729..d132400 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v2 From 5fcfe61afc9a91f2a1202b961afddef13081f8e2 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 19 Mar 2024 20:40:51 +0100 Subject: [PATCH 27/34] drop CircleCI --- .circleci/config.yml | 51 -------------------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index d2b7057..0000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,51 +0,0 @@ -version: 2 - -jobs: - - build-python37: - machine: - - image: ubuntu-2004:2023.02.1 - - steps: - - checkout - - restore_cache: - keys: - - model-cache - - run: make models - - save_cache: - key: model-cache - paths: - models_eynollah.tar.gz - models_eynollah - - run: - name: "Set Python Version" - command: pyenv install -s 3.7.16 && pyenv global 3.7.16 - - run: make install - - run: make smoke-test - - build-python38: - machine: - - image: ubuntu-2004:2023.02.1 - steps: - - checkout - - restore_cache: - keys: - - model-cache - - run: make models - - save_cache: - key: model-cache - paths: - models_eynollah.tar.gz - models_eynollah - - run: - name: "Set Python Version" - command: pyenv install -s 3.8.16 && pyenv global 3.8.16 - - run: make install - - run: make smoke-test - -workflows: - version: 2 - build: - jobs: - # - build-python37 - - build-python38 From 9d24150031bf021ee2cbb91ae388d62b681b1969 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 19 Mar 2024 22:20:19 +0100 Subject: [PATCH 28/34] use tf1.compat mode for keras --- eynollah/eynollah/train/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eynollah/eynollah/train/train.py b/eynollah/eynollah/train/train.py index 0881182..ec6900c 100644 --- a/eynollah/eynollah/train/train.py +++ b/eynollah/eynollah/train/train.py @@ -1,7 +1,7 @@ import os import sys import tensorflow as tf -from keras.backend.tensorflow_backend import set_session +from tensorflow.compat.v1.keras.backend import set_session import keras, warnings from keras.optimizers import * from sacred import Experiment From a5c885e151539e0def308242acd73311898f91b0 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 19 Mar 2024 22:34:58 +0100 Subject: [PATCH 29/34] create draft pyproject.toml --- pyproject.toml.draft | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 pyproject.toml.draft diff --git a/pyproject.toml.draft b/pyproject.toml.draft new file mode 100644 index 0000000..5042563 --- /dev/null +++ b/pyproject.toml.draft @@ -0,0 +1,40 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "eynollah" +version = "0.3.0" +authors = [ + {name = "Vahid Rezanezhad"} +] +description = "Document Layout Analysis" +readme = "README.md" +requires-python = ">=3.8" +keywords = ["document layout analysis", "semantic segmentation"] +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: Image Processing", +] +dependencies = [ + "ocrd >= 2.63.3", + "numpy <1.24.0", + "scikit-learn >= 0.23.2", + "tensorflow == 2.12.1", + "imutils >= 0.5.3", + "matplotlib", + "setuptools >= 61", +] + +[project.scripts] +eynollah = "eynollah.eynollah.cli:main" +ocrd-eynollah-segment = "eynollah.eynollah.ocrd_cli:main" + +[project.urls] +Homepage = "https://github.com/qurator-spk/eynollah" +Repository = "https://github.com/qurator-spk/eynollah.git" From 9ad7b4f52006f3a7e2d159b8ccf1e01e68826384 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 19 Mar 2024 22:59:09 +0100 Subject: [PATCH 30/34] update GitHub actions --- .github/workflows/test-eynollah.yml | 6 +++--- pyproject.toml.draft | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index d132400..fac333b 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -14,8 +14,8 @@ jobs: python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - - uses: actions/checkout@v2 - - uses: actions/cache@v2 + - uses: actions/checkout@v4 + - uses: actions/cache@v4 id: model_cache with: path: models_eynollah @@ -24,7 +24,7 @@ jobs: if: steps.model_cache.outputs.cache-hit != 'true' run: make models - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/pyproject.toml.draft b/pyproject.toml.draft index 5042563..9a22170 100644 --- a/pyproject.toml.draft +++ b/pyproject.toml.draft @@ -31,6 +31,8 @@ dependencies = [ "setuptools >= 61", ] +# TODO: test dependencies + [project.scripts] eynollah = "eynollah.eynollah.cli:main" ocrd-eynollah-segment = "eynollah.eynollah.ocrd_cli:main" From 8f6dd0649ed2b7beaea5c11218100c18240b21f5 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 19 Mar 2024 23:19:21 +0100 Subject: [PATCH 31/34] update `scikit-learn` to version supporting Python 3.12 --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7bcc941..f39359c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # ocrd includes opencv, numpy, shapely, click ocrd >= 2.63.3 numpy <1.24.0 -scikit-learn >= 0.23.2 +scikit-learn >= 1.4.1 tensorflow == 2.12.1 -imutils >= 0.5.3 +imutils >= 0.5.4 matplotlib setuptools >= 50 From 6524b0b8507bc678a0b2e643cccb821f51a31faa Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 19 Mar 2024 23:21:29 +0100 Subject: [PATCH 32/34] Update pyproject.toml.draft --- pyproject.toml.draft | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml.draft b/pyproject.toml.draft index 9a22170..aaea4cc 100644 --- a/pyproject.toml.draft +++ b/pyproject.toml.draft @@ -24,9 +24,9 @@ classifiers = [ dependencies = [ "ocrd >= 2.63.3", "numpy <1.24.0", - "scikit-learn >= 0.23.2", + "scikit-learn >= 1.4.1", "tensorflow == 2.12.1", - "imutils >= 0.5.3", + "imutils >= 0.5.4", "matplotlib", "setuptools >= 61", ] From c3d9315b9ebec6a8ab13c867a3417bbbc3802176 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Tue, 19 Mar 2024 23:33:57 +0100 Subject: [PATCH 33/34] pin `scikit-learn` version supporting Python 3.8-3.12 --- pyproject.toml.draft | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml.draft b/pyproject.toml.draft index aaea4cc..32c927d 100644 --- a/pyproject.toml.draft +++ b/pyproject.toml.draft @@ -24,7 +24,7 @@ classifiers = [ dependencies = [ "ocrd >= 2.63.3", "numpy <1.24.0", - "scikit-learn >= 1.4.1", + "scikit-learn >= 1.3.2", "tensorflow == 2.12.1", "imutils >= 0.5.4", "matplotlib", diff --git a/requirements.txt b/requirements.txt index f39359c..667a630 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # ocrd includes opencv, numpy, shapely, click ocrd >= 2.63.3 numpy <1.24.0 -scikit-learn >= 1.4.1 +scikit-learn == 1.3.2 # only version supporting Python 3.8-3.12 tensorflow == 2.12.1 imutils >= 0.5.4 matplotlib From 1469dd505f389f7f5c2d8c3d0258fbddc67a5628 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Wed, 20 Mar 2024 00:37:12 +0100 Subject: [PATCH 34/34] remove Python 3.12 from tests again for now --- .github/workflows/test-eynollah.yml | 2 +- pyproject.toml.draft | 8 ++++---- requirements.txt | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test-eynollah.yml b/.github/workflows/test-eynollah.yml index fac333b..5a1acf4 100644 --- a/.github/workflows/test-eynollah.yml +++ b/.github/workflows/test-eynollah.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v4 diff --git a/pyproject.toml.draft b/pyproject.toml.draft index 32c927d..fd0b9df 100644 --- a/pyproject.toml.draft +++ b/pyproject.toml.draft @@ -23,11 +23,11 @@ classifiers = [ ] dependencies = [ "ocrd >= 2.63.3", - "numpy <1.24.0", - "scikit-learn >= 1.3.2", - "tensorflow == 2.12.1", + "numpy <= 1.24.4", + "scikit-learn <= 1.3.2", + "tensorflow <= 2.13.1", "imutils >= 0.5.4", - "matplotlib", + "matplotlib <= 3.7.5", "setuptools >= 61", ] diff --git a/requirements.txt b/requirements.txt index 667a630..53a8c3a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # ocrd includes opencv, numpy, shapely, click ocrd >= 2.63.3 -numpy <1.24.0 -scikit-learn == 1.3.2 # only version supporting Python 3.8-3.12 -tensorflow == 2.12.1 +numpy <= 1.24.4 +scikit-learn <= 1.3.2 +tensorflow <= 2.13.1 imutils >= 0.5.4 -matplotlib -setuptools >= 50 +matplotlib <= 3.7.5 +setuptools >= 61