diff --git a/README.adoc b/README.adoc
index 57a03904..7ab00f44 100644
--- a/README.adoc
+++ b/README.adoc
@@ -742,6 +742,10 @@ python3 analyze.py --classifier checkpoints/custom/Custom_Classifier.tflite
 +
 NOTE: Setting a custom classifier will also set the new labels file.
 Due to these custom labels, the location filter and locale will be disabled.
++
+. You can include negative samples for classes by prefixing the folder names with a '-' (e.g., `-Poecile atricapillus_Black-capped Chickadee`). Do this with samples that definitely do not contain the species. Negative samples will only be used for training and not for validation. Also keep in mind that negative samples will only be used when a corresponding folder with positive samples exists.
++
+. To train with multi-label data separate the class labels with commas in the folder names (e.g., `Poecile atricapillus_Black-capped Chickadee, Cardinalis cardinalis_Northern Cardinal`). This can also be combined with negative samples as described above. The validation split will be performed combination of classes, so you might want to ensure sufficient data for each combination of classes. When using multi-label data the upsampling mode will be limited to 'repeat'.
 
 == Funding
 
diff --git a/config.py b/config.py
index c963a4d6..c911b3f6 100644
--- a/config.py
+++ b/config.py
@@ -166,9 +166,14 @@
 # Mutliple executions will be averaged, so the evaluation is more consistent
 AUTOTUNE_EXECUTIONS_PER_TRIAL: int = 1
 
-# If a binary classification model is trained, this value will be detected automatically in the training script
+# If a binary classification model is trained.
+# This value will be detected automatically in the training script, if only one class and a non-event class is used.
 BINARY_CLASSIFICATION: bool = False
 
+# If a model for a multi-label setting is trained.
+# This value will automatically be set, if subfolders in the input direcotry are named with multiple classes separated by commas.
+MULTI_LABEL: bool = False
+
 #####################
 # Misc runtime vars #
 #####################
diff --git a/gui.py b/gui.py
index 0a243b2a..abb8d95a 100644
--- a/gui.py
+++ b/gui.py
@@ -396,7 +396,14 @@ def select_subdirectories():
     if dir_name:
         subdirs = utils.list_subdirectories(dir_name[0])
 
-        return dir_name[0], [[d] for d in subdirs]
+        labels = []
+        for folder in subdirs:
+            labels_in_folder = folder.split(',')
+            for label in labels_in_folder:
+                if not label in labels and not label.startswith('-') and not label in cfg.NON_EVENT_CLASSES:
+                    labels.append(label)
+
+        return dir_name[0], [[label] for label in sorted(labels)]
 
     return None, None
 
diff --git a/model.py b/model.py
index ccb4f35a..d108294c 100644
--- a/model.py
+++ b/model.py
@@ -161,7 +161,6 @@ def buildLinearClassifier(num_labels, input_size, hidden_units=0, dropout=0.0):
 
     return model
 
-
 def trainLinearClassifier(
     classifier,
     x_train,
@@ -214,7 +213,11 @@ def on_epoch_end(self, epoch, logs=None):
     y_train = y_train[idx]
 
     # Random val split
-    x_train, y_train, x_val, y_val = utils.random_split(x_train, y_train, val_split)
+    if not cfg.MULTI_LABEL:
+        x_train, y_train, x_val, y_val = utils.random_split(x_train, y_train, val_split)
+    else:
+        x_train, y_train, x_val, y_val = utils.random_multilabel_split(x_train, y_train, val_split)
+
     print(
         f"Training on {x_train.shape[0]} samples, validating on {x_val.shape[0]} samples.",
         flush=True,
@@ -252,7 +255,10 @@ def on_epoch_end(self, epoch, logs=None):
     classifier.compile(
         optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
         loss=custom_loss,
-        metrics=[keras.metrics.AUC(curve="PR", multi_label=False, name="AUPRC"), keras.metrics.AUC(curve="ROC", multi_label=False, name="AUROC")],
+        metrics=[
+            keras.metrics.AUC(curve="PR", multi_label=cfg.MULTI_LABEL, name="AUPRC", num_labels=y_train.shape[1] if cfg.MULTI_LABEL else None, from_logits=True),
+            keras.metrics.AUC(curve="ROC", multi_label=cfg.MULTI_LABEL, name="AUROC", num_labels=y_train.shape[1] if cfg.MULTI_LABEL else None, from_logits=True)
+        ]
     )
 
     # Train model
@@ -262,7 +268,7 @@ def on_epoch_end(self, epoch, logs=None):
         epochs=epochs,
         batch_size=batch_size,
         validation_data=(x_val, y_val),
-        callbacks=callbacks,
+        callbacks=callbacks
     )
 
     return classifier, history
diff --git a/train.py b/train.py
index f400c1e9..3755bb6d 100644
--- a/train.py
+++ b/train.py
@@ -32,48 +32,78 @@ def _loadTrainingData(cache_mode="none", cache_file=""):
     if cache_mode == "load":
         if os.path.isfile(cache_file):
             print(f"\t...loading from cache: {cache_file}", flush=True)
-            x_train, y_train, labels = utils.loadFromCache(cache_file)
+            x_train, y_train, labels, cfg.BINARY_CLASSIFICATION, cfg.MULTI_LABEL = utils.loadFromCache(cache_file)
             return x_train, y_train, labels
         else:
             print(f"\t...cache file not found: {cache_file}", flush=True)
 
     # Get list of subfolders as labels
-    labels = list(sorted(utils.list_subdirectories(cfg.TRAIN_DATA_PATH)))
+    folders = list(sorted(utils.list_subdirectories(cfg.TRAIN_DATA_PATH)))
+
+
+    # Read all individual labels from the folder names
+    labels = []
+
+    for folder in folders:
+        labels_in_folder = folder.split(',')
+        for label in labels_in_folder:
+            if not label in labels:
+                labels.append(label)
+
+    # Sort labels
+    labels = list(sorted(labels))
 
     # Get valid labels
     valid_labels = [l for l in labels if not l.lower() in cfg.NON_EVENT_CLASSES and not l.startswith("-")] 
 
+    # Check if binary classification
     cfg.BINARY_CLASSIFICATION = len(valid_labels) == 1
 
+    # Validate the classes for binary classification
     if cfg.BINARY_CLASSIFICATION:
-        if len([l for l in labels if l.startswith("-")]) > 0:
-            raise Exception("negative labels cant be used with binary classification")
-        if len([l for l in labels if l in cfg.NON_EVENT_CLASSES]) == 0:
-            raise Exception("non-event samples are required for binary classification")
+        if len([l for l in folders if l.startswith("-")]) > 0:
+            raise Exception("Negative labels cant be used with binary classification")
+        if len([l for l in folders if l in cfg.NON_EVENT_CLASSES]) == 0:
+            raise Exception("Non-event samples are required for binary classification")
+
+    # Check if multi label
+    cfg.MULTI_LABEL = len(valid_labels) > 1 and any(',' in f for f in folders)
+
+    # Check if multi-label and binary classficication 
+    if cfg.BINARY_CLASSIFICATION and cfg.MULTI_LABEL:
+        raise Exception("Error: Binary classfication and multi-label not possible at the same time")
+
+    # Only allow repeat upsampling for multi-label setting
+    if cfg.MULTI_LABEL and cfg.UPSAMPLING_RATIO > 0 and cfg.UPSAMPLING_MODE != 'repeat':
+        raise Exception("Only repeat-upsampling ist available for multi-label")
 
     # Load training data
     x_train = []
     y_train = []
 
-    for label in labels:
+    for folder in folders:
 
-        # Current label
-        print(f"\t- {label}", flush=True)
+        # Current folder
+        print(f"\t- {folder}", flush=True)
 
         # Get label vector
         label_vector = np.zeros((len(valid_labels),), dtype="float32")
-        if not label.lower() in cfg.NON_EVENT_CLASSES and not label.startswith("-"):
-            label_vector[valid_labels.index(label)] = 1
-        elif label.startswith("-") and label[1:] in valid_labels: # Negative labels need to be contained in the valid labels
-            label_vector[valid_labels.index(label[1:])] = -1
+
+        folder_labels = folder.split(',')
+
+        for label in folder_labels:
+            if not label.lower() in cfg.NON_EVENT_CLASSES and not label.startswith("-"):
+                label_vector[valid_labels.index(label)] = 1
+            elif label.startswith("-") and label[1:] in valid_labels: # Negative labels need to be contained in the valid labels
+                label_vector[valid_labels.index(label[1:])] = -1
 
         # Get list of files
         # Filter files that start with '.' because macOS seems to them for temp files.
         files = filter(
             os.path.isfile,
             (
-                os.path.join(cfg.TRAIN_DATA_PATH, label, f)
-                for f in sorted(os.listdir(os.path.join(cfg.TRAIN_DATA_PATH, label)))
+                os.path.join(cfg.TRAIN_DATA_PATH, folder, f)
+                for f in sorted(os.listdir(os.path.join(cfg.TRAIN_DATA_PATH, folder)))
                 if not f.startswith(".") and f.rsplit(".", 1)[-1].lower() in cfg.ALLOWED_FILETYPES
             ),
         )
@@ -87,7 +117,7 @@ def _loadTrainingData(cache_mode="none", cache_file=""):
             
             # if anything happens print the error and ignore the file
             except Exception as e:
-                # Current label
+                # Print Error
                 print(f"\t Error when loading file {f}", flush=True)
                 continue
                 
@@ -161,6 +191,11 @@ def run_trial(self, trial, *args, **kwargs):
                                                         dropout=hp.Choice("dropout", [0.0, 0.25, 0.33, 0.5, 0.75, 0.9], default=cfg.TRAIN_DROPOUT))
                 print("...Done.", flush=True)
 
+                # Only allow repeat upsampling in multi-label setting
+                upsampling_choices = ['repeat', 'mean', 'linear'] #SMOTE is too slow
+                if cfg.MULTI_LABEL:
+                    upsampling_choices = ['repeat']
+
                 # Train model
                 print("Training model...", flush=True)
                 classifier, history = model.trainLinearClassifier(
@@ -172,7 +207,7 @@ def run_trial(self, trial, *args, **kwargs):
                     learning_rate=hp.Choice("learning_rate", [0.1, 0.01, 0.005, 0.002, 0.001, 0.0005, 0.0002, 0.0001], default=cfg.TRAIN_LEARNING_RATE),
                     val_split=cfg.TRAIN_VAL_SPLIT,
                     upsampling_ratio=hp.Choice("upsampling_ratio",[0.0, 0.25, 0.33, 0.5, 0.75, 1.0], default=cfg.UPSAMPLING_RATIO),
-                    upsampling_mode=hp.Choice("upsampling_mode", ['repeat', 'mean', 'linear'], default=cfg.UPSAMPLING_MODE), #SMOTE is too slow
+                    upsampling_mode=hp.Choice("upsampling_mode", upsampling_choices, default=cfg.UPSAMPLING_MODE), 
                     train_with_mixup=hp.Boolean("mixup", default=cfg.TRAIN_WITH_MIXUP),
                     train_with_label_smoothing=hp.Boolean("label_smoothing", default=cfg.TRAIN_WITH_LABEL_SMOOTHING),
                 )
diff --git a/utils.py b/utils.py
index 8eb4e289..7ed5c229 100644
--- a/utils.py
+++ b/utils.py
@@ -56,6 +56,73 @@ def list_subdirectories(path: str):
     """
     return filter(lambda el: os.path.isdir(os.path.join(path, el)), os.listdir(path))
 
+def random_multilabel_split(x, y, val_ratio=0.2):
+    """Splits the data into training and validation data.
+
+    Makes sure that each combination of classes is represented in both sets.
+
+    Args:
+        x: Samples.
+        y: One-hot labels.
+        val_ratio: The ratio of validation data.
+
+    Returns:
+        A tuple of (x_train, y_train, x_val, y_val).
+    
+    """
+
+    # Set numpy random seed
+    np.random.seed(cfg.RANDOM_SEED)
+
+    # Find all combinations of labels
+    class_combinations = np.unique(y, axis=0)
+    
+    # Initialize training and validation data
+    x_train, y_train, x_val, y_val = [], [], [], []
+
+    # Split the data for each combination of labels
+    for class_combination in class_combinations:
+        # find all indices
+        indices = np.where((y == class_combination).all(axis=1))[0]
+
+        # When negative sample use only for training
+        if -1 in class_combination:
+            x_train.append(x[indices])
+            y_train.append(y[indices])
+        # Otherwise split according to the validation split
+        else:
+            # Get number of samples for each set
+            num_samples = len(indices)
+            num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
+            num_samples_val = max(0, num_samples - num_samples_train)
+            # Randomly choose samples for training and validation
+            np.random.shuffle(indices)
+            train_indices = indices[:num_samples_train]
+            val_indices = indices[num_samples_train:num_samples_train + num_samples_val]
+            # Append samples to training and validation data
+            x_train.append(x[train_indices])
+            y_train.append(y[train_indices])
+            x_val.append(x[val_indices])
+            y_val.append(y[val_indices])
+     
+    # Concatenate data
+    x_train = np.concatenate(x_train)
+    y_train = np.concatenate(y_train)
+    x_val = np.concatenate(x_val)
+    y_val = np.concatenate(y_val)
+
+    # Shuffle data
+    indices = np.arange(len(x_train))
+    np.random.shuffle(indices)
+    x_train = x_train[indices]
+    y_train = y_train[indices]
+
+    indices = np.arange(len(x_val))
+    np.random.shuffle(indices)
+    x_val = x_val[indices]
+    y_val = y_val[indices]
+
+    return x_train, y_train, x_val, y_val       
 
 def random_split(x, y, val_ratio=0.2):
     """Splits the data into training and validation data.
@@ -331,7 +398,7 @@ def saveToCache(cache_file: str, x_train: np.ndarray, y_train: np.ndarray, label
     os.makedirs(os.path.dirname(cache_file), exist_ok=True)
 
     # Save to cache
-    np.savez_compressed(cache_file, x_train=x_train, y_train=y_train, labels=labels)
+    np.savez_compressed(cache_file, x_train=x_train, y_train=y_train, labels=labels, binary_classification=cfg.BINARY_CLASSIFICATION, multi_label=cfg.MULTI_LABEL)
 
 
 def loadFromCache(cache_file: str):
@@ -351,8 +418,10 @@ def loadFromCache(cache_file: str):
     x_train = cache["x_train"]
     y_train = cache["y_train"]
     labels = cache["labels"]
+    binary_classification = bool(cache["binary_classification"]) if "binary_classification" in cache.keys() else False
+    multi_label = bool(cache["multi_label"]) if "multi_label" in cache.keys() else False
 
-    return x_train, y_train, labels
+    return x_train, y_train, labels, binary_classification, multi_label
 
 
 def clearErrorLog():