mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-08 07:29:55 +02:00
Merge f60e0543ab
into 8a9b4f8f55
This commit is contained in:
commit
db3f8c6db0
12 changed files with 120 additions and 58 deletions
|
@ -13,7 +13,7 @@ The following three tasks can all be accomplished using the code in the
|
|||
* train a model
|
||||
* inference with the trained model
|
||||
|
||||
## Training , evaluation and output
|
||||
## Training, evaluation and output
|
||||
|
||||
The train and evaluation folders should contain subfolders of `images` and `labels`.
|
||||
|
||||
|
@ -22,11 +22,13 @@ The output folder should be an empty folder where the output model will be writt
|
|||
## Generate training dataset
|
||||
|
||||
The script `generate_gt_for_training.py` is used for generating training datasets. As the results of the following
|
||||
command demonstrates, the dataset generator provides three different commands:
|
||||
command demonstrates, the dataset generator provides several subcommands:
|
||||
|
||||
`python generate_gt_for_training.py --help`
|
||||
```sh
|
||||
eynollah-training generate-gt --help
|
||||
```
|
||||
|
||||
These three commands are:
|
||||
The three most important subcommands are:
|
||||
|
||||
* image-enhancement
|
||||
* machine-based-reading-order
|
||||
|
@ -38,7 +40,7 @@ Generating a training dataset for image enhancement is quite straightforward. Al
|
|||
high-resolution images. The training dataset can then be generated using the following command:
|
||||
|
||||
```sh
|
||||
python generate_gt_for_training.py image-enhancement \
|
||||
eynollah-training image-enhancement \
|
||||
-dis "dir of high resolution images" \
|
||||
-dois "dir where degraded images will be written" \
|
||||
-dols "dir where the corresponding high resolution image will be written as label" \
|
||||
|
@ -69,7 +71,7 @@ to filter out regions smaller than this minimum size. This minimum size is defin
|
|||
to the image area, with a default value of zero. To run the dataset generator, use the following command:
|
||||
|
||||
```shell
|
||||
python generate_gt_for_training.py machine-based-reading-order \
|
||||
eynollah-training generate-gt machine-based-reading-order \
|
||||
-dx "dir of GT xml files" \
|
||||
-domi "dir where output images will be written" \
|
||||
"" -docl "dir where the labels will be written" \
|
||||
|
@ -144,7 +146,7 @@ region" are also present in the label. However, other regions like "noise region
|
|||
included in the label PNG file, even if they have information in the page XML files, as we chose not to include them.
|
||||
|
||||
```sh
|
||||
python generate_gt_for_training.py pagexml2label \
|
||||
eynollah-training generate-gt pagexml2label \
|
||||
-dx "dir of GT xml files" \
|
||||
-do "dir where output label png files will be written" \
|
||||
-cfg "custom config json file" \
|
||||
|
@ -198,7 +200,7 @@ provided to ensure that they are cropped in sync with the labels. This ensures t
|
|||
required for training are obtained. The command should resemble the following:
|
||||
|
||||
```sh
|
||||
python generate_gt_for_training.py pagexml2label \
|
||||
eynollah-training generate-gt pagexml2label \
|
||||
-dx "dir of GT xml files" \
|
||||
-do "dir where output label png files will be written" \
|
||||
-cfg "custom config json file" \
|
||||
|
@ -261,7 +263,7 @@ And the "dir_eval" the same structure as train directory:
|
|||
The classification model can be trained using the following command line:
|
||||
|
||||
```sh
|
||||
python train.py with config_classification.json
|
||||
eynollah-training train with config_classification.json
|
||||
```
|
||||
|
||||
As evident in the example JSON file above, for classification, we utilize a "f1_threshold_classification" parameter.
|
||||
|
@ -395,7 +397,9 @@ And the "dir_eval" the same structure as train directory:
|
|||
After configuring the JSON file for segmentation or enhancement, training can be initiated by running the following
|
||||
command, similar to the process for classification and reading order:
|
||||
|
||||
`python train.py with config_classification.json`
|
||||
```
|
||||
eynollah-training train with config_classification.json`
|
||||
```
|
||||
|
||||
#### Binarization
|
||||
|
||||
|
@ -679,7 +683,7 @@ For conducting inference with a trained model, you simply need to execute the fo
|
|||
directory of the model and the image on which to perform inference:
|
||||
|
||||
```sh
|
||||
python inference.py -m "model dir" -i "image"
|
||||
eynollah-training inference -m "model dir" -i "image"
|
||||
```
|
||||
|
||||
This will straightforwardly return the class of the image.
|
||||
|
@ -691,7 +695,7 @@ without the reading order. We simply need to provide the model directory, the XM
|
|||
new XML file with the added reading order will be written to the output directory with the same name. We need to run:
|
||||
|
||||
```sh
|
||||
python inference.py \
|
||||
eynollah-training inference \
|
||||
-m "model dir" \
|
||||
-xml "page xml file" \
|
||||
-o "output dir to write new xml with reading order"
|
||||
|
@ -702,7 +706,7 @@ python inference.py \
|
|||
For conducting inference with a trained model for segmentation and enhancement you need to run the following command line:
|
||||
|
||||
```sh
|
||||
python inference.py \
|
||||
eynollah-training inference \
|
||||
-m "model dir" \
|
||||
-i "image" \
|
||||
-p \
|
||||
|
|
|
@ -31,6 +31,7 @@ classifiers = [
|
|||
|
||||
[project.scripts]
|
||||
eynollah = "eynollah.cli:main"
|
||||
eynollah-training = "eynollah.training.cli:main"
|
||||
ocrd-eynollah-segment = "eynollah.ocrd_cli:main"
|
||||
ocrd-sbb-binarize = "eynollah.ocrd_cli_binarization:main"
|
||||
|
||||
|
|
|
@ -1,20 +1,15 @@
|
|||
import os
|
||||
import sys
|
||||
import click
|
||||
import tensorflow as tf
|
||||
import warnings
|
||||
from tensorflow.keras.optimizers import *
|
||||
from sacred import Experiment
|
||||
from models import *
|
||||
from utils import *
|
||||
from metrics import *
|
||||
|
||||
from .models import resnet50_unet
|
||||
|
||||
|
||||
def configuration():
|
||||
gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
|
||||
session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
@click.command()
|
||||
def build_model_load_pretrained_weights_and_save():
|
||||
n_classes = 2
|
||||
input_height = 224
|
||||
input_width = 448
|
26
src/eynollah/training/cli.py
Normal file
26
src/eynollah/training/cli.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
import os
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
|
||||
import click
|
||||
import sys
|
||||
|
||||
from .build_model_load_pretrained_weights_and_save import build_model_load_pretrained_weights_and_save
|
||||
from .generate_gt_for_training import main as generate_gt_cli
|
||||
from .inference import main as inference_cli
|
||||
from .train import ex
|
||||
|
||||
@click.command(context_settings=dict(
|
||||
ignore_unknown_options=True,
|
||||
))
|
||||
@click.argument('SACRED_ARGS', nargs=-1, type=click.UNPROCESSED)
|
||||
def train_cli(sacred_args):
|
||||
ex.run_commandline([sys.argv[0]] + list(sacred_args))
|
||||
|
||||
@click.group('training')
|
||||
def main():
|
||||
pass
|
||||
|
||||
main.add_command(build_model_load_pretrained_weights_and_save)
|
||||
main.add_command(generate_gt_cli, 'generate-gt')
|
||||
main.add_command(inference_cli, 'inference')
|
||||
main.add_command(train_cli, 'train')
|
|
@ -1,9 +1,28 @@
|
|||
import click
|
||||
import json
|
||||
from gt_gen_utils import *
|
||||
import os
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from eynollah.training.gt_gen_utils import (
|
||||
filter_contours_area_of_image,
|
||||
find_format_of_given_filename_in_dir,
|
||||
find_new_features_of_contours,
|
||||
fit_text_single_line,
|
||||
get_content_of_dir,
|
||||
get_images_of_ground_truth,
|
||||
get_layout_contours_for_visualization,
|
||||
get_textline_contours_and_ocr_text,
|
||||
get_textline_contours_for_visualization,
|
||||
overlay_layout_on_image,
|
||||
read_xml,
|
||||
resize_image,
|
||||
visualize_image_from_contours,
|
||||
visualize_image_from_contours_layout
|
||||
)
|
||||
|
||||
@click.group()
|
||||
def main():
|
||||
|
@ -562,6 +581,3 @@ def visualize_ocr_text(xml_file, dir_xml, dir_out):
|
|||
# Draw the text
|
||||
draw.text((text_x, text_y), ocr_texts[index], fill="black", font=font)
|
||||
image_text.save(os.path.join(dir_out, f_name+'.png'))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,5 +1,3 @@
|
|||
import click
|
||||
import sys
|
||||
import os
|
||||
import numpy as np
|
||||
import warnings
|
||||
|
@ -8,8 +6,7 @@ from tqdm import tqdm
|
|||
import cv2
|
||||
from shapely import geometry
|
||||
from pathlib import Path
|
||||
import matplotlib.pyplot as plt
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from PIL import ImageFont
|
||||
|
||||
|
||||
KERNEL = np.ones((5, 5), np.uint8)
|
|
@ -1,23 +1,29 @@
|
|||
import sys
|
||||
import os
|
||||
import numpy as np
|
||||
import warnings
|
||||
import json
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
import seaborn as sns
|
||||
from tensorflow.keras.models import load_model
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras import backend as K
|
||||
from tensorflow.keras import layers
|
||||
import tensorflow.keras.losses
|
||||
from tensorflow.keras.layers import *
|
||||
from models import *
|
||||
from gt_gen_utils import *
|
||||
import click
|
||||
import json
|
||||
from tensorflow.python.keras import backend as tensorflow_backend
|
||||
import xml.etree.ElementTree as ET
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from .gt_gen_utils import (
|
||||
filter_contours_area_of_image,
|
||||
find_new_features_of_contours,
|
||||
read_xml,
|
||||
resize_image,
|
||||
update_list_and_return_first_with_length_bigger_than_one
|
||||
)
|
||||
from .models import (
|
||||
PatchEncoder,
|
||||
Patches
|
||||
)
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
|
@ -55,11 +61,9 @@ class sbb_predict:
|
|||
seg=seg[:,:,0]
|
||||
|
||||
seg_img=np.zeros((np.shape(seg)[0],np.shape(seg)[1],3)).astype(np.uint8)
|
||||
colors=sns.color_palette("hls", self.n_classes)
|
||||
|
||||
for c in ann_u:
|
||||
c=int(c)
|
||||
segl=(seg==c)
|
||||
seg_img[:,:,0][seg==c]=c
|
||||
seg_img[:,:,1][seg==c]=c
|
||||
seg_img[:,:,2][seg==c]=c
|
||||
|
@ -674,9 +678,3 @@ def main(image, dir_in, model, patches, save, save_layout, ground_truth, xml_fil
|
|||
x=sbb_predict(image, dir_in, model, task, config_params_model, patches, save, save_layout, ground_truth, xml_file, out, min_area)
|
||||
x.run()
|
||||
|
||||
if __name__=="__main__":
|
||||
main()
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,20 +1,45 @@
|
|||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
import click
|
||||
|
||||
from eynollah.training.metrics import (
|
||||
soft_dice_loss,
|
||||
weighted_categorical_crossentropy
|
||||
)
|
||||
from eynollah.training.models import (
|
||||
PatchEncoder,
|
||||
Patches,
|
||||
machine_based_reading_order_model,
|
||||
resnet50_classifier,
|
||||
resnet50_unet,
|
||||
vit_resnet50_unet,
|
||||
vit_resnet50_unet_transformer_before_cnn
|
||||
)
|
||||
from eynollah.training.utils import (
|
||||
data_gen,
|
||||
generate_arrays_from_folder_reading_order,
|
||||
generate_data_from_folder_evaluation,
|
||||
generate_data_from_folder_training,
|
||||
get_one_hot,
|
||||
provide_patches,
|
||||
return_number_of_total_training_data
|
||||
)
|
||||
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
import tensorflow as tf
|
||||
from tensorflow.compat.v1.keras.backend import set_session
|
||||
import warnings
|
||||
from tensorflow.keras.optimizers import *
|
||||
from tensorflow.keras.optimizers import SGD, Adam
|
||||
from sacred import Experiment
|
||||
from models import *
|
||||
from utils import *
|
||||
from metrics import *
|
||||
from tensorflow.keras.models import load_model
|
||||
from tqdm import tqdm
|
||||
import json
|
||||
from sklearn.metrics import f1_score
|
||||
from tensorflow.keras.callbacks import Callback
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
class SaveWeightsAfterSteps(Callback):
|
||||
def __init__(self, save_interval, save_path, _config):
|
||||
super(SaveWeightsAfterSteps, self).__init__()
|
||||
|
@ -45,8 +70,8 @@ def configuration():
|
|||
|
||||
|
||||
def get_dirs_or_files(input_data):
|
||||
if os.path.isdir(input_data):
|
||||
image_input, labels_input = os.path.join(input_data, 'images/'), os.path.join(input_data, 'labels/')
|
||||
if os.path.isdir(input_data):
|
||||
# Check if training dir exists
|
||||
assert os.path.isdir(image_input), "{} is not a directory".format(image_input)
|
||||
assert os.path.isdir(labels_input), "{} is not a directory".format(labels_input)
|
||||
|
@ -121,7 +146,6 @@ def config_params():
|
|||
dir_rgb_backgrounds = None
|
||||
dir_rgb_foregrounds = None
|
||||
|
||||
|
||||
@ex.automain
|
||||
def run(_config, n_classes, n_epochs, input_height,
|
||||
input_width, weight_decay, weighted_loss,
|
||||
|
@ -423,7 +447,7 @@ def run(_config, n_classes, n_epochs, input_height,
|
|||
|
||||
#f1score_tot = [0]
|
||||
indexer_start = 0
|
||||
opt = SGD(learning_rate=0.01, momentum=0.9)
|
||||
# opt = SGD(learning_rate=0.01, momentum=0.9)
|
||||
opt_adam = tf.keras.optimizers.Adam(learning_rate=0.0001)
|
||||
model.compile(loss="binary_crossentropy",
|
||||
optimizer = opt_adam,metrics=['accuracy'])
|
|
@ -1,13 +1,14 @@
|
|||
import os
|
||||
import math
|
||||
import random
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import seaborn as sns
|
||||
from scipy.ndimage.interpolation import map_coordinates
|
||||
from scipy.ndimage.filters import gaussian_filter
|
||||
import random
|
||||
from tqdm import tqdm
|
||||
import imutils
|
||||
import math
|
||||
from tensorflow.keras.utils import to_categorical
|
||||
from PIL import Image, ImageEnhance
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue