From 136a767018f6ebc26626a1efe98008d79e253c93 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 5 Dec 2019 12:01:54 +0100 Subject: [PATCH 01/12] code to produce models --- .gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 .gitkeep diff --git a/.gitkeep b/.gitkeep new file mode 100644 index 0000000..e69de29 From d4bc814d0474dde3270dfb069a12fd31fa73c3d1 Mon Sep 17 00:00:00 2001 From: b-vr103 Date: Thu, 5 Dec 2019 12:10:55 +0100 Subject: [PATCH 02/12] add files needed for training --- __init__.py | 0 metrics.py | 338 ++++++++++++++++++++++++++++++++++++++++++++++++++++ models.py | 317 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 655 insertions(+) create mode 100644 __init__.py create mode 100644 metrics.py create mode 100644 models.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/metrics.py b/metrics.py new file mode 100644 index 0000000..c63cc22 --- /dev/null +++ b/metrics.py @@ -0,0 +1,338 @@ +from keras import backend as K +import tensorflow as tf +import numpy as np + +def focal_loss(gamma=2., alpha=4.): + + gamma = float(gamma) + alpha = float(alpha) + + def focal_loss_fixed(y_true, y_pred): + """Focal loss for multi-classification + FL(p_t)=-alpha(1-p_t)^{gamma}ln(p_t) + Notice: y_pred is probability after softmax + gradient is d(Fl)/d(p_t) not d(Fl)/d(x) as described in paper + d(Fl)/d(p_t) * [p_t(1-p_t)] = d(Fl)/d(x) + Focal Loss for Dense Object Detection + https://arxiv.org/abs/1708.02002 + + Arguments: + y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls] + y_pred {tensor} -- model's output, shape of [batch_size, num_cls] + + Keyword Arguments: + gamma {float} -- (default: {2.0}) + alpha {float} -- (default: {4.0}) + + Returns: + [tensor] -- loss. + """ + epsilon = 1.e-9 + y_true = tf.convert_to_tensor(y_true, tf.float32) + y_pred = tf.convert_to_tensor(y_pred, tf.float32) + + model_out = tf.add(y_pred, epsilon) + ce = tf.multiply(y_true, -tf.log(model_out)) + weight = tf.multiply(y_true, tf.pow(tf.subtract(1., model_out), gamma)) + fl = tf.multiply(alpha, tf.multiply(weight, ce)) + reduced_fl = tf.reduce_max(fl, axis=1) + return tf.reduce_mean(reduced_fl) + return focal_loss_fixed + +def weighted_categorical_crossentropy(weights=None): + """ weighted_categorical_crossentropy + + Args: + * weights: crossentropy weights + Returns: + * weighted categorical crossentropy function + """ + + def loss(y_true, y_pred): + labels_floats = tf.cast(y_true, tf.float32) + per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats,logits=y_pred) + + if weights is not None: + weight_mask = tf.maximum(tf.reduce_max(tf.constant( + np.array(weights, dtype=np.float32)[None, None, None]) + * labels_floats, axis=-1), 1.0) + per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None] + return tf.reduce_mean(per_pixel_loss) + return loss +def image_categorical_cross_entropy(y_true, y_pred, weights=None): + """ + :param y_true: tensor of shape (batch_size, height, width) representing the ground truth. + :param y_pred: tensor of shape (batch_size, height, width) representing the prediction. + :return: The mean cross-entropy on softmaxed tensors. + """ + + labels_floats = tf.cast(y_true, tf.float32) + per_pixel_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_floats,logits=y_pred) + + if weights is not None: + weight_mask = tf.maximum( + tf.reduce_max(tf.constant( + np.array(weights, dtype=np.float32)[None, None, None]) + * labels_floats, axis=-1), 1.0) + per_pixel_loss = per_pixel_loss * weight_mask[:, :, :, None] + + return tf.reduce_mean(per_pixel_loss) +def class_tversky(y_true, y_pred): + smooth = 1.0#1.00 + + y_true = K.permute_dimensions(y_true, (3,1,2,0)) + y_pred = K.permute_dimensions(y_pred, (3,1,2,0)) + + y_true_pos = K.batch_flatten(y_true) + y_pred_pos = K.batch_flatten(y_pred) + true_pos = K.sum(y_true_pos * y_pred_pos, 1) + false_neg = K.sum(y_true_pos * (1-y_pred_pos), 1) + false_pos = K.sum((1-y_true_pos)*y_pred_pos, 1) + alpha = 0.2#0.5 + beta=0.8 + return (true_pos + smooth)/(true_pos + alpha*false_neg + (beta)*false_pos + smooth) + +def focal_tversky_loss(y_true,y_pred): + pt_1 = class_tversky(y_true, y_pred) + gamma =1.3#4./3.0#1.3#4.0/3.00# 0.75 + return K.sum(K.pow((1-pt_1), gamma)) + +def generalized_dice_coeff2(y_true, y_pred): + n_el = 1 + for dim in y_true.shape: + n_el *= int(dim) + n_cl = y_true.shape[-1] + w = K.zeros(shape=(n_cl,)) + w = (K.sum(y_true, axis=(0,1,2)))/(n_el) + w = 1/(w**2+0.000001) + numerator = y_true*y_pred + numerator = w*K.sum(numerator,(0,1,2)) + numerator = K.sum(numerator) + denominator = y_true+y_pred + denominator = w*K.sum(denominator,(0,1,2)) + denominator = K.sum(denominator) + return 2*numerator/denominator +def generalized_dice_coeff(y_true, y_pred): + axes = tuple(range(1, len(y_pred.shape)-1)) + Ncl = y_pred.shape[-1] + w = K.zeros(shape=(Ncl,)) + w = K.sum(y_true, axis=axes) + w = 1/(w**2+0.000001) + # Compute gen dice coef: + numerator = y_true*y_pred + numerator = w*K.sum(numerator,axes) + numerator = K.sum(numerator) + + denominator = y_true+y_pred + denominator = w*K.sum(denominator,axes) + denominator = K.sum(denominator) + + gen_dice_coef = 2*numerator/denominator + + return gen_dice_coef + +def generalized_dice_loss(y_true, y_pred): + return 1 - generalized_dice_coeff2(y_true, y_pred) +def soft_dice_loss(y_true, y_pred, epsilon=1e-6): + ''' + Soft dice loss calculation for arbitrary batch size, number of classes, and number of spatial dimensions. + Assumes the `channels_last` format. + + # Arguments + y_true: b x X x Y( x Z...) x c One hot encoding of ground truth + y_pred: b x X x Y( x Z...) x c Network output, must sum to 1 over c channel (such as after softmax) + epsilon: Used for numerical stability to avoid divide by zero errors + + # References + V-Net: Fully Convolutional Neural Networks for Volumetric Medical Image Segmentation + https://arxiv.org/abs/1606.04797 + More details on Dice loss formulation + https://mediatum.ub.tum.de/doc/1395260/1395260.pdf (page 72) + + Adapted from https://github.com/Lasagne/Recipes/issues/99#issuecomment-347775022 + ''' + + # skip the batch and class axis for calculating Dice score + axes = tuple(range(1, len(y_pred.shape)-1)) + + numerator = 2. * K.sum(y_pred * y_true, axes) + + denominator = K.sum(K.square(y_pred) + K.square(y_true), axes) + return 1.00 - K.mean(numerator / (denominator + epsilon)) # average over classes and batch + +def seg_metrics(y_true, y_pred, metric_name, metric_type='standard', drop_last = True, mean_per_class=False, verbose=False): + """ + Compute mean metrics of two segmentation masks, via Keras. + + IoU(A,B) = |A & B| / (| A U B|) + Dice(A,B) = 2*|A & B| / (|A| + |B|) + + Args: + y_true: true masks, one-hot encoded. + y_pred: predicted masks, either softmax outputs, or one-hot encoded. + metric_name: metric to be computed, either 'iou' or 'dice'. + metric_type: one of 'standard' (default), 'soft', 'naive'. + In the standard version, y_pred is one-hot encoded and the mean + is taken only over classes that are present (in y_true or y_pred). + The 'soft' version of the metrics are computed without one-hot + encoding y_pred. + The 'naive' version return mean metrics where absent classes contribute + to the class mean as 1.0 (instead of being dropped from the mean). + drop_last = True: boolean flag to drop last class (usually reserved + for background class in semantic segmentation) + mean_per_class = False: return mean along batch axis for each class. + verbose = False: print intermediate results such as intersection, union + (as number of pixels). + Returns: + IoU/Dice of y_true and y_pred, as a float, unless mean_per_class == True + in which case it returns the per-class metric, averaged over the batch. + + Inputs are B*W*H*N tensors, with + B = batch size, + W = width, + H = height, + N = number of classes + """ + + flag_soft = (metric_type == 'soft') + flag_naive_mean = (metric_type == 'naive') + + # always assume one or more classes + num_classes = K.shape(y_true)[-1] + + if not flag_soft: + # get one-hot encoded masks from y_pred (true masks should already be one-hot) + y_pred = K.one_hot(K.argmax(y_pred), num_classes) + y_true = K.one_hot(K.argmax(y_true), num_classes) + + # if already one-hot, could have skipped above command + # keras uses float32 instead of float64, would give error down (but numpy arrays or keras.to_categorical gives float64) + y_true = K.cast(y_true, 'float32') + y_pred = K.cast(y_pred, 'float32') + + # intersection and union shapes are batch_size * n_classes (values = area in pixels) + axes = (1,2) # W,H axes of each image + intersection = K.sum(K.abs(y_true * y_pred), axis=axes) + mask_sum = K.sum(K.abs(y_true), axis=axes) + K.sum(K.abs(y_pred), axis=axes) + union = mask_sum - intersection # or, np.logical_or(y_pred, y_true) for one-hot + + smooth = .001 + iou = (intersection + smooth) / (union + smooth) + dice = 2 * (intersection + smooth)/(mask_sum + smooth) + + metric = {'iou': iou, 'dice': dice}[metric_name] + + # define mask to be 0 when no pixels are present in either y_true or y_pred, 1 otherwise + mask = K.cast(K.not_equal(union, 0), 'float32') + + if drop_last: + metric = metric[:,:-1] + mask = mask[:,:-1] + + if verbose: + print('intersection, union') + print(K.eval(intersection), K.eval(union)) + print(K.eval(intersection/union)) + + # return mean metrics: remaining axes are (batch, classes) + if flag_naive_mean: + return K.mean(metric) + + # take mean only over non-absent classes + class_count = K.sum(mask, axis=0) + non_zero = tf.greater(class_count, 0) + non_zero_sum = tf.boolean_mask(K.sum(metric * mask, axis=0), non_zero) + non_zero_count = tf.boolean_mask(class_count, non_zero) + + if verbose: + print('Counts of inputs with class present, metrics for non-absent classes') + print(K.eval(class_count), K.eval(non_zero_sum / non_zero_count)) + + return K.mean(non_zero_sum / non_zero_count) + +def mean_iou(y_true, y_pred, **kwargs): + """ + Compute mean Intersection over Union of two segmentation masks, via Keras. + + Calls metrics_k(y_true, y_pred, metric_name='iou'), see there for allowed kwargs. + """ + return seg_metrics(y_true, y_pred, metric_name='iou', **kwargs) +def Mean_IOU(y_true, y_pred): + nb_classes = K.int_shape(y_pred)[-1] + iou = [] + true_pixels = K.argmax(y_true, axis=-1) + pred_pixels = K.argmax(y_pred, axis=-1) + void_labels = K.equal(K.sum(y_true, axis=-1), 0) + for i in range(0, nb_classes): # exclude first label (background) and last label (void) + true_labels = K.equal(true_pixels, i)# & ~void_labels + pred_labels = K.equal(pred_pixels, i)# & ~void_labels + inter = tf.to_int32(true_labels & pred_labels) + union = tf.to_int32(true_labels | pred_labels) + legal_batches = K.sum(tf.to_int32(true_labels), axis=1)>0 + ious = K.sum(inter, axis=1)/K.sum(union, axis=1) + iou.append(K.mean(tf.gather(ious, indices=tf.where(legal_batches)))) # returns average IoU of the same objects + iou = tf.stack(iou) + legal_labels = ~tf.debugging.is_nan(iou) + iou = tf.gather(iou, indices=tf.where(legal_labels)) + return K.mean(iou) + +def iou_vahid(y_true, y_pred): + nb_classes = tf.shape(y_true)[-1]+tf.to_int32(1) + true_pixels = K.argmax(y_true, axis=-1) + pred_pixels = K.argmax(y_pred, axis=-1) + iou = [] + + for i in tf.range(nb_classes): + tp=K.sum( tf.to_int32( K.equal(true_pixels, i) & K.equal(pred_pixels, i) ) ) + fp=K.sum( tf.to_int32( K.not_equal(true_pixels, i) & K.equal(pred_pixels, i) ) ) + fn=K.sum( tf.to_int32( K.equal(true_pixels, i) & K.not_equal(pred_pixels, i) ) ) + iouh=tp/(tp+fp+fn) + iou.append(iouh) + return K.mean(iou) + + +def IoU_metric(Yi,y_predi): + ## mean Intersection over Union + ## Mean IoU = TP/(FN + TP + FP) + y_predi = np.argmax(y_predi, axis=3) + y_testi = np.argmax(Yi, axis=3) + IoUs = [] + Nclass = int(np.max(Yi)) + 1 + for c in range(Nclass): + TP = np.sum( (Yi == c)&(y_predi==c) ) + FP = np.sum( (Yi != c)&(y_predi==c) ) + FN = np.sum( (Yi == c)&(y_predi != c)) + IoU = TP/float(TP + FP + FN) + IoUs.append(IoU) + return K.cast( np.mean(IoUs) ,dtype='float32' ) + + +def IoU_metric_keras(y_true, y_pred): + ## mean Intersection over Union + ## Mean IoU = TP/(FN + TP + FP) + init = tf.global_variables_initializer() + sess = tf.Session() + sess.run(init) + + return IoU_metric(y_true.eval(session=sess), y_pred.eval(session=sess)) + +def jaccard_distance_loss(y_true, y_pred, smooth=100): + """ + Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|) + = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|)) + + The jaccard distance loss is usefull for unbalanced datasets. This has been + shifted so it converges on 0 and is smoothed to avoid exploding or disapearing + gradient. + + Ref: https://en.wikipedia.org/wiki/Jaccard_index + + @url: https://gist.github.com/wassname/f1452b748efcbeb4cb9b1d059dce6f96 + @author: wassname + """ + intersection = K.sum(K.abs(y_true * y_pred), axis=-1) + sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1) + jac = (intersection + smooth) / (sum_ - intersection + smooth) + return (1 - jac) * smooth + + diff --git a/models.py b/models.py new file mode 100644 index 0000000..7c806b4 --- /dev/null +++ b/models.py @@ -0,0 +1,317 @@ +from keras.models import * +from keras.layers import * +from keras import layers +from keras.regularizers import l2 + +resnet50_Weights_path='./pretrained_model/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' +IMAGE_ORDERING ='channels_last' +MERGE_AXIS=-1 + + +def one_side_pad( x ): + x = ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING)(x) + if IMAGE_ORDERING == 'channels_first': + x = Lambda(lambda x : x[: , : , :-1 , :-1 ] )(x) + elif IMAGE_ORDERING == 'channels_last': + x = Lambda(lambda x : x[: , :-1 , :-1 , : ] )(x) + return x + +def identity_block(input_tensor, kernel_size, filters, stage, block): + """The identity block is the block that has no conv layer at shortcut. + # Arguments + input_tensor: input tensor + kernel_size: defualt 3, the kernel size of middle conv layer at main path + filters: list of integers, the filterss of 3 conv layer at main path + stage: integer, current stage label, used for generating layer names + block: 'a','b'..., current block label, used for generating layer names + # Returns + Output tensor for the block. + """ + filters1, filters2, filters3 = filters + + if IMAGE_ORDERING == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + + conv_name_base = 'res' + str(stage) + block + '_branch' + bn_name_base = 'bn' + str(stage) + block + '_branch' + + x = Conv2D(filters1, (1, 1) , data_format=IMAGE_ORDERING , name=conv_name_base + '2a')(input_tensor) + x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) + x = Activation('relu')(x) + + x = Conv2D(filters2, kernel_size , data_format=IMAGE_ORDERING , + padding='same', name=conv_name_base + '2b')(x) + x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) + x = Activation('relu')(x) + + x = Conv2D(filters3 , (1, 1), data_format=IMAGE_ORDERING , name=conv_name_base + '2c')(x) + x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) + + x = layers.add([x, input_tensor]) + x = Activation('relu')(x) + return x + + +def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): + """conv_block is the block that has a conv layer at shortcut + # Arguments + input_tensor: input tensor + kernel_size: defualt 3, the kernel size of middle conv layer at main path + filters: list of integers, the filterss of 3 conv layer at main path + stage: integer, current stage label, used for generating layer names + block: 'a','b'..., current block label, used for generating layer names + # Returns + Output tensor for the block. + Note that from stage 3, the first conv layer at main path is with strides=(2,2) + And the shortcut should have strides=(2,2) as well + """ + filters1, filters2, filters3 = filters + + if IMAGE_ORDERING == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + + conv_name_base = 'res' + str(stage) + block + '_branch' + bn_name_base = 'bn' + str(stage) + block + '_branch' + + x = Conv2D(filters1, (1, 1) , data_format=IMAGE_ORDERING , strides=strides, + name=conv_name_base + '2a')(input_tensor) + x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) + x = Activation('relu')(x) + + x = Conv2D(filters2, kernel_size , data_format=IMAGE_ORDERING , padding='same', + name=conv_name_base + '2b')(x) + x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) + x = Activation('relu')(x) + + x = Conv2D(filters3, (1, 1) , data_format=IMAGE_ORDERING , name=conv_name_base + '2c')(x) + x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) + + shortcut = Conv2D(filters3, (1, 1) , data_format=IMAGE_ORDERING , strides=strides, + name=conv_name_base + '1')(input_tensor) + shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) + + x = layers.add([x, shortcut]) + x = Activation('relu')(x) + return x + + +def resnet50_unet_light(n_classes,input_height=224,input_width=224,weight_decay=1e-6,pretraining=False): + assert input_height%32 == 0 + assert input_width%32 == 0 + + + img_input = Input(shape=(input_height,input_width , 3 )) + + if IMAGE_ORDERING == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + + x = ZeroPadding2D((3, 3), data_format=IMAGE_ORDERING)(img_input) + x = Conv2D(64, (7, 7), data_format=IMAGE_ORDERING, strides=(2, 2),kernel_regularizer=l2(weight_decay), name='conv1')(x) + f1 = x + + x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) + x = Activation('relu')(x) + x = MaxPooling2D((3, 3) , data_format=IMAGE_ORDERING , strides=(2, 2))(x) + + + x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) + x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') + x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') + f2 = one_side_pad(x ) + + + x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') + f3 = x + + x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') + f4 = x + + x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') + f5 = x + + + if pretraining: + model=Model( img_input , x ).load_weights(resnet50_Weights_path) + + + v512_2048 = Conv2D( 512 , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( f5 ) + v512_2048 = ( BatchNormalization(axis=bn_axis))(v512_2048) + v512_2048 = Activation('relu')(v512_2048) + + + + v512_1024=Conv2D( 512 , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( f4 ) + v512_1024 = ( BatchNormalization(axis=bn_axis))(v512_1024) + v512_1024 = Activation('relu')(v512_1024) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(v512_2048) + o = ( concatenate([ o ,v512_1024],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) + o = ( Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay)))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([ o ,f3],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) + o = ( Conv2D( 256, (3, 3), padding='valid', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay)))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([o,f2],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING))(o) + o = ( Conv2D( 128 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay) ) )(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([o,f1],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) + o = ( Conv2D( 64 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) ))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([o,img_input],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) + o = ( Conv2D( 32 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) ))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + + o = Conv2D( n_classes , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( o ) + o = ( BatchNormalization(axis=bn_axis))(o) + o = (Activation('softmax'))(o) + + + model = Model( img_input , o ) + return model + +def resnet50_unet(n_classes,input_height=224,input_width=224,weight_decay=1e-6,pretraining=False): + assert input_height%32 == 0 + assert input_width%32 == 0 + + + img_input = Input(shape=(input_height,input_width , 3 )) + + if IMAGE_ORDERING == 'channels_last': + bn_axis = 3 + else: + bn_axis = 1 + + x = ZeroPadding2D((3, 3), data_format=IMAGE_ORDERING)(img_input) + x = Conv2D(64, (7, 7), data_format=IMAGE_ORDERING, strides=(2, 2),kernel_regularizer=l2(weight_decay), name='conv1')(x) + f1 = x + + x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) + x = Activation('relu')(x) + x = MaxPooling2D((3, 3) , data_format=IMAGE_ORDERING , strides=(2, 2))(x) + + + x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) + x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') + x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') + f2 = one_side_pad(x ) + + + x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') + f3 = x + + x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') + f4 = x + + x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') + f5 = x + + if pretraining: + Model( img_input , x ).load_weights(resnet50_Weights_path) + + v1024_2048 = Conv2D( 1024 , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( f5 ) + v1024_2048 = ( BatchNormalization(axis=bn_axis))(v1024_2048) + v1024_2048 = Activation('relu')(v1024_2048) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(v1024_2048) + o = ( concatenate([ o ,f4],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) + o = ( Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay)))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([ o ,f3],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D( (1,1), data_format=IMAGE_ORDERING))(o) + o = ( Conv2D( 256, (3, 3), padding='valid', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay)))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([o,f2],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING))(o) + o = ( Conv2D( 128 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING, kernel_regularizer=l2(weight_decay) ) )(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([o,f1],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) + o = ( Conv2D( 64 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) ))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = ( UpSampling2D( (2,2), data_format=IMAGE_ORDERING))(o) + o = ( concatenate([o,img_input],axis=MERGE_AXIS ) ) + o = ( ZeroPadding2D((1,1) , data_format=IMAGE_ORDERING ))(o) + o = ( Conv2D( 32 , (3, 3), padding='valid' , data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) ))(o) + o = ( BatchNormalization(axis=bn_axis))(o) + o = Activation('relu')(o) + + + o = Conv2D( n_classes , (1, 1) , padding='same', data_format=IMAGE_ORDERING,kernel_regularizer=l2(weight_decay) )( o ) + o = ( BatchNormalization(axis=bn_axis))(o) + o = (Activation('softmax'))(o) + + model = Model( img_input , o ) + + + + + return model From 038d776c2cbd6662c51d5f0cb68705134ef1bc56 Mon Sep 17 00:00:00 2001 From: b-vr103 Date: Thu, 5 Dec 2019 14:05:07 +0100 Subject: [PATCH 03/12] add files needed for training --- README | 23 ++++ config_params.json | 24 ++++ train.py | 192 ++++++++++++++++++++++++++ utils.py | 336 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 575 insertions(+) create mode 100644 README create mode 100644 config_params.json create mode 100644 train.py create mode 100644 utils.py diff --git a/README b/README new file mode 100644 index 0000000..7d8d790 --- /dev/null +++ b/README @@ -0,0 +1,23 @@ +how to train: + just run: python train.py with config_params.json + + +format of ground truth: + + Lables for each pixel is identified by a number . So if you have a binary case n_classes should be set to 2 and labels should be 0 and 1 for each class and pixel. + In the case of multiclass just set n_classes to the number of classes you have and the try to produce the labels by pixels from 0 , 1 ,2 .., n_classes-1. + The labels format should be png. + + If you have an image label for binary case it should look like this: + + Label: [ [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]], [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ,[[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ] this means that you have an image by 3*4*3 and pixel[0,0] belongs to class 1 and pixel[0,1] to class 0. + +traing , evaluation and output: + train and evaluation folder should have subfolder of images and labels. + And output folder should be free folder which the output model will be written there. + +patches: + + if you want to train your model with patches, the height and width of patches should be defined and also number of batchs (how many patches should be seen by model by each iteration). + In the case that model should see the image once, like page extraction, the patches should be set to false. + diff --git a/config_params.json b/config_params.json new file mode 100644 index 0000000..52db6db --- /dev/null +++ b/config_params.json @@ -0,0 +1,24 @@ +{ + "n_classes" : 2, + "n_epochs" : 2, + "input_height" : 448, + "input_width" : 896, + "weight_decay" : 1e-6, + "n_batch" : 1, + "learning_rate": 1e-4, + "patches" : true, + "pretraining" : true, + "augmentation" : false, + "flip_aug" : false, + "elastic_aug" : false, + "blur_aug" : false, + "scaling" : false, + "binarization" : false, + "scaling_bluring" : false, + "scaling_binarization" : false, + "rotation": false, + "weighted_loss": true, + "dir_train": "/home/vahid/textline_gt_images/train_light", + "dir_eval": "/home/vahid/textline_gt_images/eval", + "dir_output": "/home/vahid/textline_gt_images/output" +} diff --git a/train.py b/train.py new file mode 100644 index 0000000..07c7418 --- /dev/null +++ b/train.py @@ -0,0 +1,192 @@ +import os +import sys +import tensorflow as tf +from keras.backend.tensorflow_backend import set_session +import keras , warnings +from keras.optimizers import * +from sacred import Experiment +from models import * +from utils import * +from metrics import * + + +def configuration(): + keras.backend.clear_session() + tf.reset_default_graph() + warnings.filterwarnings('ignore') + + os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID' + config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) + + + config.gpu_options.allow_growth = True + config.gpu_options.per_process_gpu_memory_fraction=0.95#0.95 + config.gpu_options.visible_device_list="0" + set_session(tf.Session(config=config)) + +def get_dirs_or_files(input_data): + if os.path.isdir(input_data): + image_input, labels_input = os.path.join(input_data, 'images/'), os.path.join(input_data, 'labels/') + # Check if training dir exists + assert os.path.isdir(image_input), "{} is not a directory".format(image_input) + assert os.path.isdir(labels_input), "{} is not a directory".format(labels_input) + return image_input, labels_input + +ex = Experiment() + +@ex.config +def config_params(): + n_classes=None # Number of classes. If your case study is binary case the set it to 2 and otherwise give your number of cases. + n_epochs=1 + input_height=224*1 + input_width=224*1 + weight_decay=1e-6 # Weight decay of l2 regularization of model layers. + n_batch=1 # Number of batches at each iteration. + learning_rate=1e-4 + patches=False # Make patches of image in order to use all information of image. In the case of page + # extraction this should be set to false since model should see all image. + augmentation=False + flip_aug=False # Flip image (augmentation). + elastic_aug=False # Elastic transformation (augmentation). + blur_aug=False # Blur patches of image (augmentation). + scaling=False # Scaling of patches (augmentation) will be imposed if this set to true. + binarization=False # Otsu thresholding. Used for augmentation in the case of binary case like textline prediction. For multicases should not be applied. + dir_train=None # Directory of training dataset (sub-folders should be named images and labels). + dir_eval=None # Directory of validation dataset (sub-folders should be named images and labels). + dir_output=None # Directory of output where the model should be saved. + pretraining=False # Set true to load pretrained weights of resnet50 encoder. + weighted_loss=False # Set True if classes are unbalanced and you want to use weighted loss function. + scaling_bluring=False + rotation: False + scaling_binarization=False + blur_k=['blur','guass','median'] # Used in order to blur image. Used for augmentation. + scales=[0.9 , 1.1 ] # Scale patches with these scales. Used for augmentation. + flip_index=[0,1] # Flip image. Used for augmentation. + + +@ex.automain +def run(n_classes,n_epochs,input_height, + input_width,weight_decay,weighted_loss, + n_batch,patches,augmentation,flip_aug,blur_aug,scaling, binarization, + blur_k,scales,dir_train, + scaling_bluring,scaling_binarization,rotation, + flip_index,dir_eval ,dir_output,pretraining,learning_rate): + + dir_img,dir_seg=get_dirs_or_files(dir_train) + dir_img_val,dir_seg_val=get_dirs_or_files(dir_eval) + + # make first a directory in output for both training and evaluations in order to flow data from these directories. + dir_train_flowing=os.path.join(dir_output,'train') + dir_eval_flowing=os.path.join(dir_output,'eval') + + dir_flow_train_imgs=os.path.join(dir_train_flowing,'images') + dir_flow_train_labels=os.path.join(dir_train_flowing,'labels') + + dir_flow_eval_imgs=os.path.join(dir_eval_flowing,'images') + dir_flow_eval_labels=os.path.join(dir_eval_flowing,'labels') + + if os.path.isdir(dir_train_flowing): + os.system('rm -rf '+dir_train_flowing) + os.makedirs(dir_train_flowing) + else: + os.makedirs(dir_train_flowing) + + if os.path.isdir(dir_eval_flowing): + os.system('rm -rf '+dir_eval_flowing) + os.makedirs(dir_eval_flowing) + else: + os.makedirs(dir_eval_flowing) + + + os.mkdir(dir_flow_train_imgs) + os.mkdir(dir_flow_train_labels) + + os.mkdir(dir_flow_eval_imgs) + os.mkdir(dir_flow_eval_labels) + + + + #set the gpu configuration + configuration() + + + #writing patches into a sub-folder in order to be flowed from directory. + provide_patches(dir_img,dir_seg,dir_flow_train_imgs, + dir_flow_train_labels, + input_height,input_width,blur_k,blur_aug, + flip_aug,binarization,scaling,scales,flip_index, + scaling_bluring,scaling_binarization,rotation, + augmentation=augmentation,patches=patches) + + provide_patches(dir_img_val,dir_seg_val,dir_flow_eval_imgs, + dir_flow_eval_labels, + input_height,input_width,blur_k,blur_aug, + flip_aug,binarization,scaling,scales,flip_index, + scaling_bluring,scaling_binarization,rotation, + augmentation=False,patches=patches) + + if weighted_loss: + weights=np.zeros(n_classes) + for obj in os.listdir(dir_seg): + label_obj=cv2.imread(dir_seg+'/'+obj) + label_obj_one_hot=get_one_hot( label_obj,label_obj.shape[0],label_obj.shape[1],n_classes) + weights+=(label_obj_one_hot.sum(axis=0)).sum(axis=0) + + + weights=1.00/weights + + weights=weights/float(np.sum(weights)) + weights=weights/float(np.min(weights)) + weights=weights/float(np.sum(weights)) + + + + + #get our model. + model = resnet50_unet(n_classes, input_height, input_width,weight_decay,pretraining) + + #if you want to see the model structure just uncomment model summary. + #model.summary() + + + if not weighted_loss: + model.compile(loss='categorical_crossentropy', + optimizer = Adam(lr=learning_rate),metrics=['accuracy']) + if weighted_loss: + model.compile(loss=weighted_categorical_crossentropy(weights), + optimizer = Adam(lr=learning_rate),metrics=['accuracy']) + + mc = keras.callbacks.ModelCheckpoint('weights{epoch:08d}.h5', + save_weights_only=True, period=1) + + + #generating train and evaluation data + train_gen = data_gen(dir_flow_train_imgs,dir_flow_train_labels, batch_size = n_batch, + input_height=input_height, input_width=input_width,n_classes=n_classes ) + val_gen = data_gen(dir_flow_eval_imgs,dir_flow_eval_labels, batch_size = n_batch, + input_height=input_height, input_width=input_width,n_classes=n_classes ) + + + model.fit_generator( + train_gen, + steps_per_epoch=int(len(os.listdir(dir_flow_train_imgs))/n_batch), + validation_data=val_gen, + validation_steps=1, + epochs=n_epochs) + + + + os.system('rm -rf '+dir_train_flowing) + os.system('rm -rf '+dir_eval_flowing) + + model.save(dir_output+'/'+'model'+'.h5') + + + + + + + + + + diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..afdc9e5 --- /dev/null +++ b/utils.py @@ -0,0 +1,336 @@ +import os +import cv2 +import numpy as np +import seaborn as sns +from scipy.ndimage.interpolation import map_coordinates +from scipy.ndimage.filters import gaussian_filter +import random +from tqdm import tqdm + + + + +def bluring(img_in,kind): + if kind=='guass': + img_blur = cv2.GaussianBlur(img_in,(5,5),0) + elif kind=="median": + img_blur = cv2.medianBlur(img_in,5) + elif kind=='blur': + img_blur=cv2.blur(img_in,(5,5)) + return img_blur + +def color_images(seg, n_classes): + ann_u=range(n_classes) + if len(np.shape(seg))==3: + seg=seg[:,:,0] + + seg_img=np.zeros((np.shape(seg)[0],np.shape(seg)[1],3)).astype(float) + colors=sns.color_palette("hls", n_classes) + + for c in ann_u: + c=int(c) + segl=(seg==c) + seg_img[:,:,0]+=segl*(colors[c][0]) + seg_img[:,:,1]+=segl*(colors[c][1]) + seg_img[:,:,2]+=segl*(colors[c][2]) + return seg_img + + +def resize_image(seg_in,input_height,input_width): + return cv2.resize(seg_in,(input_width,input_height),interpolation=cv2.INTER_NEAREST) +def get_one_hot(seg,input_height,input_width,n_classes): + seg=seg[:,:,0] + seg_f=np.zeros((input_height, input_width,n_classes)) + for j in range(n_classes): + seg_f[:,:,j]=(seg==j).astype(int) + return seg_f + + +def IoU(Yi,y_predi): + ## mean Intersection over Union + ## Mean IoU = TP/(FN + TP + FP) + + IoUs = [] + classes_true=np.unique(Yi) + for c in classes_true: + TP = np.sum( (Yi == c)&(y_predi==c) ) + FP = np.sum( (Yi != c)&(y_predi==c) ) + FN = np.sum( (Yi == c)&(y_predi != c)) + IoU = TP/float(TP + FP + FN) + print("class {:02.0f}: #TP={:6.0f}, #FP={:6.0f}, #FN={:5.0f}, IoU={:4.3f}".format(c,TP,FP,FN,IoU)) + IoUs.append(IoU) + mIoU = np.mean(IoUs) + print("_________________") + print("Mean IoU: {:4.3f}".format(mIoU)) + return mIoU +def data_gen(img_folder, mask_folder, batch_size,input_height, input_width,n_classes): + c = 0 + n = os.listdir(img_folder) #List of training images + random.shuffle(n) + while True: + img = np.zeros((batch_size, input_height, input_width, 3)).astype('float') + mask = np.zeros((batch_size, input_height, input_width, n_classes)).astype('float') + + for i in range(c, c+batch_size): #initially from 0 to 16, c = 0. + #print(img_folder+'/'+n[i]) + filename=n[i].split('.')[0] + train_img = cv2.imread(img_folder+'/'+n[i])/255. + train_img = cv2.resize(train_img, (input_width, input_height),interpolation=cv2.INTER_NEAREST)# Read an image from folder and resize + + img[i-c] = train_img #add to array - img[0], img[1], and so on. + train_mask = cv2.imread(mask_folder+'/'+filename+'.png') + #print(mask_folder+'/'+filename+'.png') + #print(train_mask.shape) + train_mask = get_one_hot( resize_image(train_mask,input_height,input_width),input_height,input_width,n_classes) + #train_mask = train_mask.reshape(224, 224, 1) # Add extra dimension for parity with train_img size [512 * 512 * 3] + + mask[i-c] = train_mask + + c+=batch_size + if(c+batch_size>=len(os.listdir(img_folder))): + c=0 + random.shuffle(n) + yield img, mask + +def otsu_copy(img): + img_r=np.zeros(img.shape) + img1=img[:,:,0] + img2=img[:,:,1] + img3=img[:,:,2] + _, threshold1 = cv2.threshold(img1, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) + _, threshold2 = cv2.threshold(img2, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) + _, threshold3 = cv2.threshold(img3, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) + img_r[:,:,0]=threshold1 + img_r[:,:,1]=threshold1 + img_r[:,:,2]=threshold1 + return img_r + +def rotation_90(img): + img_rot=np.zeros((img.shape[1],img.shape[0],img.shape[2])) + img_rot[:,:,0]=img[:,:,0].T + img_rot[:,:,1]=img[:,:,1].T + img_rot[:,:,2]=img[:,:,2].T + return img_rot + +def get_patches(dir_img_f,dir_seg_f,img,label,height,width,indexer): + + + img_h=img.shape[0] + img_w=img.shape[1] + + nxf=img_w/float(width) + nyf=img_h/float(height) + + if nxf>int(nxf): + nxf=int(nxf)+1 + if nyf>int(nyf): + nyf=int(nyf)+1 + + nxf=int(nxf) + nyf=int(nyf) + + for i in range(nxf): + for j in range(nyf): + index_x_d=i*width + index_x_u=(i+1)*width + + index_y_d=j*height + index_y_u=(j+1)*height + + if index_x_u>img_w: + index_x_u=img_w + index_x_d=img_w-width + if index_y_u>img_h: + index_y_u=img_h + index_y_d=img_h-height + + + img_patch=img[index_y_d:index_y_u,index_x_d:index_x_u,:] + label_patch=label[index_y_d:index_y_u,index_x_d:index_x_u,:] + + cv2.imwrite(dir_img_f+'/img_'+str(indexer)+'.png', img_patch ) + cv2.imwrite(dir_seg_f+'/img_'+str(indexer)+'.png' , label_patch ) + indexer+=1 + return indexer + + + +def get_patches_num_scale(dir_img_f,dir_seg_f,img,label,height,width,indexer,scaler): + + + img_h=img.shape[0] + img_w=img.shape[1] + + height_scale=int(height*scaler) + width_scale=int(width*scaler) + + + nxf=img_w/float(width_scale) + nyf=img_h/float(height_scale) + + if nxf>int(nxf): + nxf=int(nxf)+1 + if nyf>int(nyf): + nyf=int(nyf)+1 + + nxf=int(nxf) + nyf=int(nyf) + + for i in range(nxf): + for j in range(nyf): + index_x_d=i*width_scale + index_x_u=(i+1)*width_scale + + index_y_d=j*height_scale + index_y_u=(j+1)*height_scale + + if index_x_u>img_w: + index_x_u=img_w + index_x_d=img_w-width_scale + if index_y_u>img_h: + index_y_u=img_h + index_y_d=img_h-height_scale + + + img_patch=img[index_y_d:index_y_u,index_x_d:index_x_u,:] + label_patch=label[index_y_d:index_y_u,index_x_d:index_x_u,:] + + img_patch=resize_image(img_patch,height,width) + label_patch=resize_image(label_patch,height,width) + + cv2.imwrite(dir_img_f+'/img_'+str(indexer)+'.png', img_patch ) + cv2.imwrite(dir_seg_f+'/img_'+str(indexer)+'.png' , label_patch ) + indexer+=1 + + return indexer + + + +def provide_patches(dir_img,dir_seg,dir_flow_train_imgs, + dir_flow_train_labels, + input_height,input_width,blur_k,blur_aug, + flip_aug,binarization,scaling,scales,flip_index, + scaling_bluring,scaling_binarization,rotation, + augmentation=False,patches=False): + + imgs_cv_train=np.array(os.listdir(dir_img)) + segs_cv_train=np.array(os.listdir(dir_seg)) + + indexer=0 + for im, seg_i in tqdm(zip(imgs_cv_train,segs_cv_train)): + img_name=im.split('.')[0] + + if not patches: + cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', resize_image(cv2.imread(dir_img+'/'+im),input_height,input_width ) ) + cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png' , resize_image(cv2.imread(dir_seg+'/'+img_name+'.png'),input_height,input_width ) ) + indexer+=1 + + if augmentation: + if rotation: + cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', + rotation_90( resize_image(cv2.imread(dir_img+'/'+im), + input_height,input_width) ) ) + + + cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png', + rotation_90 ( resize_image(cv2.imread(dir_seg+'/'+img_name+'.png'), + input_height,input_width) ) ) + indexer+=1 + + if flip_aug: + for f_i in flip_index: + cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', + resize_image(cv2.flip(cv2.imread(dir_img+'/'+im),f_i),input_height,input_width) ) + + cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png' , + resize_image(cv2.flip(cv2.imread(dir_seg+'/'+img_name+'.png'),f_i),input_height,input_width) ) + indexer+=1 + + if blur_aug: + for blur_i in blur_k: + cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', + (resize_image(bluring(cv2.imread(dir_img+'/'+im),blur_i),input_height,input_width) ) ) + + cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png' , + resize_image(cv2.imread(dir_seg+'/'+img_name+'.png'),input_height,input_width) ) + indexer+=1 + + + if binarization: + cv2.imwrite(dir_flow_train_imgs+'/img_'+str(indexer)+'.png', + resize_image(otsu_copy( cv2.imread(dir_img+'/'+im)),input_height,input_width )) + + cv2.imwrite(dir_flow_train_labels+'/img_'+str(indexer)+'.png', + resize_image( cv2.imread(dir_seg+'/'+img_name+'.png'),input_height,input_width )) + indexer+=1 + + + + + + + if patches: + + indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, + cv2.imread(dir_img+'/'+im),cv2.imread(dir_seg+'/'+img_name+'.png'), + input_height,input_width,indexer=indexer) + + if augmentation: + + if rotation: + + + indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, + rotation_90( cv2.imread(dir_img+'/'+im) ), + rotation_90( cv2.imread(dir_seg+'/'+img_name+'.png') ), + input_height,input_width,indexer=indexer) + if flip_aug: + for f_i in flip_index: + + indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, + cv2.flip( cv2.imread(dir_img+'/'+im) , f_i), + cv2.flip( cv2.imread(dir_seg+'/'+img_name+'.png') ,f_i), + input_height,input_width,indexer=indexer) + if blur_aug: + for blur_i in blur_k: + indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, + bluring( cv2.imread(dir_img+'/'+im) , blur_i), + cv2.imread(dir_seg+'/'+img_name+'.png'), + input_height,input_width,indexer=indexer) + + + if scaling: + for sc_ind in scales: + indexer=get_patches_num_scale(dir_flow_train_imgs,dir_flow_train_labels, + cv2.imread(dir_img+'/'+im) , + cv2.imread(dir_seg+'/'+img_name+'.png'), + input_height,input_width,indexer=indexer,scaler=sc_ind) + if binarization: + + indexer=get_patches(dir_flow_train_imgs,dir_flow_train_labels, + otsu_copy( cv2.imread(dir_img+'/'+im)), + cv2.imread(dir_seg+'/'+img_name+'.png'), + input_height,input_width,indexer=indexer) + + + + if scaling_bluring: + for sc_ind in scales: + for blur_i in blur_k: + indexer=get_patches_num_scale(dir_flow_train_imgs,dir_flow_train_labels, + bluring( cv2.imread(dir_img+'/'+im) , blur_i) , + cv2.imread(dir_seg+'/'+img_name+'.png') , + input_height,input_width,indexer=indexer,scaler=sc_ind) + + if scaling_binarization: + for sc_ind in scales: + indexer=get_patches_num_scale(dir_flow_train_imgs,dir_flow_train_labels, + otsu_copy( cv2.imread(dir_img+'/'+im)) , + cv2.imread(dir_seg+'/'+img_name+'.png'), + input_height,input_width,indexer=indexer,scaler=sc_ind) + + + + + + From cd1990d4f93feda23deceba0650934948600db00 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 5 Dec 2019 14:05:55 +0100 Subject: [PATCH 04/12] Update config_params.json --- config_params.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config_params.json b/config_params.json index 52db6db..5066444 100644 --- a/config_params.json +++ b/config_params.json @@ -18,7 +18,7 @@ "scaling_binarization" : false, "rotation": false, "weighted_loss": true, - "dir_train": "/home/vahid/textline_gt_images/train_light", - "dir_eval": "/home/vahid/textline_gt_images/eval", - "dir_output": "/home/vahid/textline_gt_images/output" + "dir_train": "../train", + "dir_eval": "../eval", + "dir_output": "../output" } From a216dccfcfa1c9541508a766e2f5dee21e7065d1 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 5 Dec 2019 14:08:08 +0100 Subject: [PATCH 05/12] Update README --- README | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/README b/README index 7d8d790..8d478bd 100644 --- a/README +++ b/README @@ -4,17 +4,20 @@ how to train: format of ground truth: - Lables for each pixel is identified by a number . So if you have a binary case n_classes should be set to 2 and labels should be 0 and 1 for each class and pixel. - In the case of multiclass just set n_classes to the number of classes you have and the try to produce the labels by pixels from 0 , 1 ,2 .., n_classes-1. + Lables for each pixel is identified by a number . So if you have a binary case n_classes should be set to 2 and + labels should be 0 and 1 for each class and pixel. + In the case of multiclass just set n_classes to the number of classes you have and the try to produce the labels + by pixels set from 0 , 1 ,2 .., n_classes-1. The labels format should be png. If you have an image label for binary case it should look like this: - Label: [ [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]], [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ,[[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ] this means that you have an image by 3*4*3 and pixel[0,0] belongs to class 1 and pixel[0,1] to class 0. + Label: [ [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]], [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ,[[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ] + this means that you have an image by 3*4*3 and pixel[0,0] belongs to class 1 and pixel[0,1] to class 0. -traing , evaluation and output: +training , evaluation and output: train and evaluation folder should have subfolder of images and labels. - And output folder should be free folder which the output model will be written there. + And output folder should be empty folder which the output model will be written there. patches: From 036e2e9de5ad0ddbce4adddc6d8f8f4a24bc838f Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 5 Dec 2019 14:11:37 +0100 Subject: [PATCH 06/12] Update README --- README | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README b/README index 8d478bd..54ea408 100644 --- a/README +++ b/README @@ -21,6 +21,7 @@ training , evaluation and output: patches: - if you want to train your model with patches, the height and width of patches should be defined and also number of batchs (how many patches should be seen by model by each iteration). + if you want to train your model with patches, the height and width of patches should be defined and also number of + batchs (how many patches should be seen by model by each iteration). In the case that model should see the image once, like page extraction, the patches should be set to false. From f7a5a57245c8978be65cf52e099075c54fd762a2 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 5 Dec 2019 16:11:31 +0100 Subject: [PATCH 07/12] Update README --- README | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README b/README index 54ea408..e103b0b 100644 --- a/README +++ b/README @@ -1,8 +1,8 @@ -how to train: +# Train just run: python train.py with config_params.json -format of ground truth: +# Ground truth format Lables for each pixel is identified by a number . So if you have a binary case n_classes should be set to 2 and labels should be 0 and 1 for each class and pixel. @@ -15,11 +15,11 @@ format of ground truth: Label: [ [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]], [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ,[[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ] this means that you have an image by 3*4*3 and pixel[0,0] belongs to class 1 and pixel[0,1] to class 0. -training , evaluation and output: +# Training , evaluation and output train and evaluation folder should have subfolder of images and labels. And output folder should be empty folder which the output model will be written there. -patches: +# Patches if you want to train your model with patches, the height and width of patches should be defined and also number of batchs (how many patches should be seen by model by each iteration). From bbe6f99a85fa8cb164442d3d685897cc2a1612b0 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 5 Dec 2019 16:13:37 +0100 Subject: [PATCH 08/12] Update README --- README | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/README b/README index e103b0b..5237d53 100644 --- a/README +++ b/README @@ -1,27 +1,2 @@ -# Train - just run: python train.py with config_params.json - - -# Ground truth format - - Lables for each pixel is identified by a number . So if you have a binary case n_classes should be set to 2 and - labels should be 0 and 1 for each class and pixel. - In the case of multiclass just set n_classes to the number of classes you have and the try to produce the labels - by pixels set from 0 , 1 ,2 .., n_classes-1. - The labels format should be png. - - If you have an image label for binary case it should look like this: - - Label: [ [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]], [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ,[[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ] - this means that you have an image by 3*4*3 and pixel[0,0] belongs to class 1 and pixel[0,1] to class 0. - -# Training , evaluation and output - train and evaluation folder should have subfolder of images and labels. - And output folder should be empty folder which the output model will be written there. - -# Patches - - if you want to train your model with patches, the height and width of patches should be defined and also number of - batchs (how many patches should be seen by model by each iteration). - In the case that model should see the image once, like page extraction, the patches should be set to false. + From e4013feb91695cf7fee37e5d4e0574cff2cb8340 Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 5 Dec 2019 16:13:40 +0100 Subject: [PATCH 09/12] Delete README --- README | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 README diff --git a/README b/README deleted file mode 100644 index 5237d53..0000000 --- a/README +++ /dev/null @@ -1,2 +0,0 @@ - - From f69d4459cbd542db3b4a613283b255aa75f9070f Mon Sep 17 00:00:00 2001 From: "Rezanezhad, Vahid" Date: Thu, 5 Dec 2019 16:14:00 +0100 Subject: [PATCH 10/12] Add new file --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..c4dc27e --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# Train + just run: python train.py with config_params.json + + +# Ground truth format + + Lables for each pixel is identified by a number . So if you have a binary case n_classes should be set to 2 and + labels should be 0 and 1 for each class and pixel. + In the case of multiclass just set n_classes to the number of classes you have and the try to produce the labels + by pixels set from 0 , 1 ,2 .., n_classes-1. + The labels format should be png. + + If you have an image label for binary case it should look like this: + + Label: [ [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]], [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ,[[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ] + this means that you have an image by 3*4*3 and pixel[0,0] belongs to class 1 and pixel[0,1] to class 0. + +# Training , evaluation and output + train and evaluation folder should have subfolder of images and labels. + And output folder should be empty folder which the output model will be written there. + +# Patches + + if you want to train your model with patches, the height and width of patches should be defined and also number of + batchs (how many patches should be seen by model by each iteration). + In the case that model should see the image once, like page extraction, the patches should be set to false. \ No newline at end of file From 0cddfff04f71e24ad029244123fb109ea5a16ee1 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 10 Dec 2019 11:57:37 +0100 Subject: [PATCH 11/12] Update README --- README.md | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c4dc27e..16e5dce 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,21 @@ # Ground truth format - Lables for each pixel is identified by a number . So if you have a binary case n_classes should be set to 2 and + Lables for each pixel is identified by a number . So if you have a + binary case n_classes should be set to 2 and labels should be 0 and 1 for each class and pixel. - In the case of multiclass just set n_classes to the number of classes you have and the try to produce the labels + In the case of multiclass just set n_classes to the number of classes + you have and the try to produce the labels by pixels set from 0 , 1 ,2 .., n_classes-1. The labels format should be png. If you have an image label for binary case it should look like this: - Label: [ [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]], [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ,[[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ] - this means that you have an image by 3*4*3 and pixel[0,0] belongs to class 1 and pixel[0,1] to class 0. + Label: [ [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]], + [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] , + [[1 0 0 1], [1 0 0 1] ,[1 0 0 1]] ] + This means that you have an image by 3*4*3 and pixel[0,0] belongs + to class 1 and pixel[0,1] to class 0. # Training , evaluation and output train and evaluation folder should have subfolder of images and labels. @@ -21,6 +26,11 @@ # Patches - if you want to train your model with patches, the height and width of patches should be defined and also number of + if you want to train your model with patches, the height and width of + patches should be defined and also number of batchs (how many patches should be seen by model by each iteration). - In the case that model should see the image once, like page extraction, the patches should be set to false. \ No newline at end of file + In the case that model should see the image once, like page extraction, + the patches should be set to false. +# Pretrained encoder +Download weights from this limk and add it to pretrained_model folder. +https://file.spk-berlin.de:8443/pretrained_encoder/ From c5e1e2dda7542c6d8a9787fa496b538ce8519794 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 10 Dec 2019 11:58:02 +0100 Subject: [PATCH 12/12] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 16e5dce..3ba90a1 100644 --- a/README.md +++ b/README.md @@ -32,5 +32,5 @@ In the case that model should see the image once, like page extraction, the patches should be set to false. # Pretrained encoder -Download weights from this limk and add it to pretrained_model folder. +Download weights from this link and add it to pretrained_model folder. https://file.spk-berlin.de:8443/pretrained_encoder/