Second version of the RAFNI algorithm

ari-dasci · May 18, 2022 · 39d0a2e · 39d0a2e
1 parent ab87daa
commit 39d0a2e
Show file tree

Hide file tree

Showing 9 changed files with 409 additions and 140 deletions.
diff --git a/CNND2L.py b/CNND2L.py
@@ -0,0 +1,63 @@
+
+import tensorflow as tf
+import tensorflow.keras.backend as K
+from tensorflow.keras.layers import BatchNormalization, Activation, Conv2D
+from tensorflow.keras.layers import MaxPooling2D, Flatten, Dense
+from tensorflow.keras.regularizers import l2
+import numpy as np
+
+# def eight_layer(input_tensor = None, input_shape = None, num_classes = 10):
+def eight_layer(num_classes):
+    # if input_tensor is None:
+    #     img_input = tf.keras.Input(shape = input_shape)
+    # else:
+    #     if not K.is_keras_tensor(input_shape):
+    #         img_input = tf.keras.Input(tensor = input_tensor, shape = input_shape)
+    #     else:
+    #         img_input = input_tensor
+    img_input = tf.keras.Input(shape = (32, 32, 3))
+
+    # Block 1
+    x = Conv2D(64, (3, 3), padding = 'same', kernel_initializer = 'he_normal',
+                name = 'block1_conv1')(img_input)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+    x = Conv2D(64, (3, 3), padding = 'same', kernel_initializer = 'he_normal',
+                name = 'block1_conv2')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+    x = MaxPooling2D((2, 2), strides = (2, 2), name = 'block1_pool')(x)
+
+    # Block 2
+    x = Conv2D(128, (3, 3), padding = 'same', kernel_initializer = 'he_normal',
+                name = 'block2_conv1')(img_input)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+    x = Conv2D(128, (3, 3), padding = 'same', kernel_initializer = 'he_normal',
+                name = 'block2_conv2')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+    x = MaxPooling2D((2, 2), strides = (2, 2), name = 'block2_pool')(x)
+
+    # Block 3
+    x = Conv2D(196, (3, 3), padding = 'same', kernel_initializer = 'he_normal',
+                name = 'block3_conv1')(img_input)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+    x = Conv2D(196, (3, 3), padding = 'same', kernel_initializer = 'he_normal',
+                name = 'block3_conv2')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu')(x)
+    x = MaxPooling2D((2, 2), strides = (2, 2), name = 'block3_pool')(x)
+
+    x = Flatten(name = 'flatten')(x)
+
+    x = Dense(256, kernel_initializer = 'he_normal', kernel_regularizer = l2(0.01),
+                bias_regularizer = l2(0.01), name = 'fc1')(x)
+    x = BatchNormalization()(x)
+    x = Activation('relu', name = 'lid')(x)
+
+    x = Dense(num_classes, kernel_initializer = 'he_normal')(x)
+    x = Activation('softmax')(x)
+
+    return tf.keras.Model(inputs = img_input, outputs = x)
diff --git a/densenet.py b/densenet.py
@@ -0,0 +1,54 @@
+import tensorflow as tf
+from tensorflow.keras.layers import BatchNormalization, Activation, Conv2D, Dropout
+from tensorflow.keras.layers import AveragePooling2D, GlobalAveragePooling2D, Dense
+import numpy as np
+
+def conv(input, filters, stride):
+    return Conv2D(filters, (3, 3), stride, padding = 'same', use_bias = False,
+                    kernel_initializer = tf.keras.initializers.RandomNormal(
+                        stddev = np.sqrt(2.0 / 9 / filters)))(input)
+
+def add_layer(input, growthRate):
+    x = BatchNormalization(momentum = 0.9, epsilon = 1e-05)(input)
+    x = Activation('relu')(x)
+    x = conv(x, growthRate, (1, 1))
+    x = Dropout(rate = 0.2)(x)
+    return tf.concat([input, x], 3)
+
+def add_transition(input):
+    filters = input.shape[3]
+    x = BatchNormalization(momentum = 0.9, epsilon = 1e-05)(input)
+    x = Activation('relu')(x)
+    x = Conv2D(filters, (1, 1), strides = (1, 1), use_bias = False, padding = 'same')(x)
+    x = Activation('relu')(x)
+    x = Dropout(rate = 0.2)(x)
+    x = AveragePooling2D((2, 2), strides = (2, 2))(x)
+    return x
+
+def densenet(depth, growthRate, num_classes):
+    N = int((depth-4)/3)
+
+    input = tf.keras.Input(shape = (32, 32, 3))
+    x = conv(input, 16, (1, 1))
+
+    # Block 1
+    for i in range(N):
+        x = add_layer(x, growthRate)
+    x = add_transition(x)
+
+    # Block 2
+    for i in range(N):
+        x = add_layer(x, growthRate)
+    x = add_transition(x)
+
+    # Block 3
+    for i in range(N):
+        x = add_layer(x, growthRate)
+
+    x = BatchNormalization(momentum = 0.9, epsilon = 1e-05)(x)
+    x = Activation('relu')(x)
+    x = GlobalAveragePooling2D()(x)
+    output = Dense(num_classes, activation = 'softmax',
+            kernel_initializer = tf.keras.initializers.VarianceScaling(2.0))(x)
+
+    return tf.keras.Model(inputs = input, outputs = output)
diff --git a/dictionaries.py b/dictionaries.py
@@ -8,7 +8,7 @@
 from collections import deque
 
 def add_change_in_dict(dict, key, new_class):
-    # The next line create a new item in the dictionary if key does not
+    # The next line creates a new item in the dictionary if key does not
     # exist in dict and change its value if it exists.
     # At this point, key and new_class are not tensors.
     dict[key] = new_class

diff --git a/fit.py b/fit.py
@@ -6,6 +6,13 @@
 from dictionaries import *
 from load_data import *
 from sklearn.mixture import GaussianMixture
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from statistics import NormalDist
+import os
+import tempfile
+import scipy.stats as stats
 
 # https://tensorflow.google.cn/guide/keras/train_and_evaluate?hl=zh-cn#part_ii_writing_your_own_training_evaluation_loops_from_scratch
 '''
@@ -16,30 +23,38 @@
     optimizer: optimizer to use. Instance of Keras Optimizer.
     epochs: number of epochs to train. Integer.
     global_batch_size: global batch size used during training. Integer.
-    prob_threshold: probability threshold to use in the relabelling mechanism. Float.
+    quantile_prob: quantile to use for the probability threshold in the relabelling mechanism. Float.
     changes_dict: dictionary containing the changes in the training set. Python dictionary.
     removals_dict: dictionary containing the removals in the training set. Python dictionary.
-    epoch_threshold: epoch threshold so that there is no change in the training set before
-        that threshold and there is no removal before 1.5*epoch_threshold. Integer.
     record_dict: dictionary containing the last predictions of the instances. Python dictionary.
+    not_change_dict: dictionary containing the instances that cannot be changed. Python dictionary.
+    record_length: length of the record dictionary. Integer.
     not_change_epochs: number of epochs after a change during which there is not possible
         to change the label of that instance again nor remove it. Integer.
     quantile_loss: quantile to use for the loss threshold in the filtering mechanism. Float.
 '''
 def fit(model, train_dataset, optimizer, epochs, global_batch_size,
-            prob_threshold, changes_dict, removals_dict,
-            epoch_threshold, record_dict, not_change_dict, record_length,
+            quantile_prob, changes_dict, removals_dict,
+            record_dict, not_change_dict, record_length,
             not_change_epochs, quantile_loss):
 
     epoch = 0
 
     threshold_mean_loss = 0
     previous_threshold_mean_loss = 0
-    apply_loss_thres = True
+    apply_thresholds = True
+    overlap = 1
+    previous_overlap = 1
+    start_rafni = False
+    previous_prob_threshold = 1
+    prob_threshold = 1
 
-    while epoch <= epochs:
+    areas = []
+
+    while epoch < epochs:
         print('Start of epoch %d' % (epoch,))
         losses_epoch = []
+        prob_bad_epoch = []
 
         # Iterate over the batches of the dataset
         for step, (fn_batch_train, x_batch_train, y_batch_train) in enumerate(train_dataset):
@@ -64,7 +79,8 @@ def fit(model, train_dataset, optimizer, epochs, global_batch_size,
             # Open a GradientTape to record the operations run during the
             # forward pass, which enables autodifferentiation,
             with tf.GradientTape() as tape:
-
+                # for selfie
+                # l2_loss = tf.math.add_n([tf.nn.l2_loss(var) for var in model.trainable_variables])
                 # Run the forwards pass of the layer.
                 # The operations that the layer applies to its inputs are
                 # going to be recorded on the GradientTape.
@@ -79,11 +95,17 @@ def fit(model, train_dataset, optimizer, epochs, global_batch_size,
                 # Get the losses of the batch inside the losses_epoch list
                 losses_epoch.extend(losses)
 
+                # Get the probabilities of the misclassified samples
+                for idx in range(len(logits)):
+                    ny = ny_batch_train.numpy()
+                    if np.argmax(logits[idx]) != np.argmax(ny[idx]):
+                        prob_bad_epoch.append(np.max(logits[idx]))
+
                 ls_array = np.array(losses)
 
                 # Use the loss of each sample:
                 # 1. to restore the original class
-                if epoch >= epoch_threshold:
+                if start_rafni:
                     # Check if it is necessary to change the label of any instance
                     check_high_prob_wrong_label(nfn_batch_train, ny_batch_train,
                                                 logits, changes_dict, prob_threshold,
@@ -96,7 +118,6 @@ def fit(model, train_dataset, optimizer, epochs, global_batch_size,
                     check_record(nfn_batch_train, record_dict, removals_dict,
                                     record_length, changes_dict)
                 # 2. to filter instances
-                if epoch >= 1.5*epoch_threshold:
                     for idx in range(len(losses)):
                         if (losses[idx] > threshold_mean_loss
                             and nfn_batch_train[idx].numpy() not in not_change_dict):
@@ -129,10 +150,20 @@ def fit(model, train_dataset, optimizer, epochs, global_batch_size,
 
         losses_epoch = np.array(losses_epoch)
 
-        # Set the noisy losses' mean, std and threshold
-        if apply_loss_thres:
+        # Modify loss and probability thresholds
+        if apply_thresholds:
+
+            # Probability threshold (if there are misclassified samples)
+            if len(prob_bad_epoch) != 0:
+                prob_bad_epoch = np.array(prob_bad_epoch)
+                previous_prob_threshold = prob_threshold
+                prob_threshold = np.quantile(prob_bad_epoch, quantile_prob)
+                print('Prob threshold')
+                print(prob_threshold)
+
+            # Loss threshold
             previous_threshold_mean_loss = threshold_mean_loss
-            gm = GaussianMixture(n_components=2, warm_start = True).fit(losses_epoch.reshape(-1,1))
+            gm = GaussianMixture(n_components=2, warm_start = True, tol = 0.1, reg_covar = 0.15).fit(losses_epoch.reshape(-1,1))
             noisy_distribution_idx = np.argmax(gm.means_)
             normal_distribution_idx = np.argmin(gm.means_)
             noisy_losses_mean = gm.means_[noisy_distribution_idx][0]
@@ -142,12 +173,19 @@ def fit(model, train_dataset, optimizer, epochs, global_batch_size,
             normal_losses_mean = gm.means_[normal_distribution_idx][0]
             normal_losses_std = gm.covariances_[normal_distribution_idx][0][0]
 
-            if (epoch >= 1.5*epoch_threshold and ((noisy_losses_mean - normal_losses_mean <= 0.3)
-                or (noisy_losses_mean - 4 * noisy_losses_std > threshold_mean_loss)
-                or (previous_threshold_mean_loss - threshold_mean_loss) > 0.25)):
-                apply_loss_thres = False
+            previous_overlap = overlap
+            overlap = NormalDist(mu = normal_losses_mean, sigma = normal_losses_std).overlap(NormalDist(mu = noisy_losses_mean, sigma = noisy_losses_std))
+            areas.append(overlap)
+            if start_rafni == False and (overlap < 0.15 or (epoch !=0 and previous_overlap < overlap)):
+                start_rafni = True
+                print("Epoch threshold: " + str(epoch+1))
+
+            if (start_rafni and ((noisy_losses_mean - normal_losses_mean <= 0.3))):
+                apply_thresholds = False
                 threshold_mean_loss = previous_threshold_mean_loss
+                prob_threshold = previous_prob_threshold
 
         epoch = epoch + 1
 
+    print(areas)
     return model, changes_dict, removals_dict
diff --git a/load_data.py b/load_data.py
@@ -27,6 +27,8 @@ def get_filename(file_path):
 def decode_img(img):
     # Convert the compressed string to a 3D uint8 tensor
     img = tf.image.decode_jpeg(img, channels = 3)
+    # Use 'convert_image_dtype' to convert to floats in the [0,1] range
+    # img = tf.image.convert_image_dtype(img, tf.float32)
     # Resize the image to the desired size
     return tf.image.resize(img, [224, 224])
 
@@ -42,7 +44,10 @@ def prepare_for_training(ds, batch_size, cache = True, shuffle_buffer_size = 500
 
     ds = ds.shuffle(buffer_size = shuffle_buffer_size)
 
-    ds = ds.batch(batch_size)
+    # Repeat forever
+    # ds = ds.repeat()
+
+    ds = ds.batch(batch_size) #, drop_remainder = True)
 
     # Data augmentation
     if data_aug:
@@ -133,15 +138,19 @@ def load_train_test_cifar(path_name, batch_size, noise = None, rate = 0,
     test_data = np.load(test_data).astype(np.float32)
     test_labels = np.load(test_labels)
 
+    fn_train = []
+    [fn_train.append(str(train_labels[i]) + '_' + str(i)) for i in range(len(train_labels))]
+    fn_train = np.array(fn_train)
+    fn_test = []
+    [fn_test.append(str(test_labels[i]) + '_' + str(i)) for i in range(len(test_labels))]
+    fn_test = np.array(fn_test)
+
     train_labels = tf.keras.utils.to_categorical(train_labels)
     test_labels = tf.keras.utils.to_categorical(test_labels)
 
-    fn_train = np.arange(len(train_data))
-    fn_test = np.arange(len(test_data))
-
     train_ds = tf.data.Dataset.from_tensor_slices((fn_train, train_data, train_labels))
     test_ds = tf.data.Dataset.from_tensor_slices((fn_test, test_data, test_labels))
 
     train_ds =  prepare_for_training(train_ds, batch_size, cache = cache,
-                                                shuffle_buffer_size = 48000, data_aug = data_aug)
+                                                shuffle_buffer_size = 50000, data_aug = data_aug)
     return train_ds, test_ds