diff --git a/src/eynollah/sbb_binarize.py b/src/eynollah/sbb_binarize.py index 851ac7d..37ac7c3 100644 --- a/src/eynollah/sbb_binarize.py +++ b/src/eynollah/sbb_binarize.py @@ -19,7 +19,7 @@ from eynollah.model_zoo import EynollahModelZoo tf_disable_interactive_logs() import tensorflow as tf from tensorflow.python.keras import backend as tensorflow_backend - +from pathlib import Path from .utils import is_image_filename def resize_image(img_in, input_height, input_width): @@ -347,7 +347,7 @@ class SbbBinarizer: self.logger.info("Found %d image files to binarize in %s", len(ls_imgs), dir_in) for i, image_path in enumerate(ls_imgs): self.logger.info('Binarizing [%3d/%d] %s', i + 1, len(ls_imgs), image_path) - image_stem = image_path.split('.')[0] + image_stem = Path(image_path).stem image = cv2.imread(os.path.join(dir_in,image_path) ) img_last = 0 model_file, model = self.models diff --git a/src/eynollah/training/gt_gen_utils.py b/src/eynollah/training/gt_gen_utils.py index 62a094a..0f29f9e 100644 --- a/src/eynollah/training/gt_gen_utils.py +++ b/src/eynollah/training/gt_gen_utils.py @@ -734,12 +734,15 @@ def get_images_of_ground_truth(gt_list, dir_in, output_dir, output_type, config_ _, thresh = cv2.threshold(imgray, 0, 255, 0) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) - + cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) - - cnt = contours[np.argmax(cnt_size)] - - x, y, w, h = cv2.boundingRect(cnt) + + try: + cnt = contours[np.argmax(cnt_size)] + x, y, w, h = cv2.boundingRect(cnt) + except: + x, y , w, h = 0, 0, x_len, y_len + bb_xywh = [x, y, w, h] diff --git a/src/eynollah/training/models.py b/src/eynollah/training/models.py index 5528761..d1b0aa2 100644 --- a/src/eynollah/training/models.py +++ b/src/eynollah/training/models.py @@ -843,7 +843,7 @@ def cnn_rnn_ocr_model(image_height=None, image_width=None, n_classes=None, max_s addition_rnn = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(image_width, return_sequences=True, dropout=0.25))(addition) - out = tf.keras.layers.Conv1D(max_seq, 1, data_format="channels_last")(addition_rnn) + out = tf.keras.layers.Conv1D(max_seq, 1, data_format="channels_first")(addition_rnn) out = tf.keras.layers.BatchNormalization(name="bn9")(out) out = tf.keras.layers.Activation("relu", name="relu9")(out) #out = tf.keras.layers.Conv1D(n_classes, 1, activation='relu', data_format="channels_last")(out) diff --git a/src/eynollah/training/utils.py b/src/eynollah/training/utils.py index c589957..3b685f1 100644 --- a/src/eynollah/training/utils.py +++ b/src/eynollah/training/utils.py @@ -1,7 +1,7 @@ import os import math import random - +from pathlib import Path import cv2 import numpy as np import seaborn as sns @@ -32,6 +32,9 @@ def scale_padd_image_for_ocr(img, height, width): else: width_new = width + if width_new <= 0: + width_new = width + img_res= resize_image (img, height, width_new) img_fin = np.ones((height, width, 3))*255 @@ -1304,7 +1307,8 @@ def data_gen_ocr(padding_token, n_batch, input_height, input_width, max_len, dir batchcount = 0 while True: for i in ls_files_images: - f_name = i.split('.')[0] + print(i, 'i') + f_name = Path(i).stem#.split('.')[0] txt_inp = open(os.path.join(dir_train, "labels/"+f_name+'.txt'),'r').read().split('\n')[0]