TL-System · baochunli · Nov 10, 2023 · Nov 7, 2023 · Nov 7, 2023 · Nov 7, 2023
diff --git a/docs/examples.md b/docs/examples.md
@@ -236,6 +236,19 @@ python examples/split_learning/controlnet_split_learning/split_learning_main.py
 ```
 ````
 
+````{admonition} **Split Learning for Training LLM**
+This is an example of fine-tuning the Hugging Face large language model with split learning. The fine-tuning policy includes training the whole model and fine-tuning with the LoRA algorithm. The cut layer in the configuration file should be set as an integer, indicating cutting at which transformer block in the transformer model.
+
+Fine-tune the whole model
+```shell
+python ./examples/split_learning/llm_split_learning/split_learning_main.py -c ./examples/split_learning/llm_split_learning/split_learning_wikitext103_gpt2.yml
+```
+Fine-tune with LoRA
+```shell
+python ./examples/split_learning/llm_split_learning/split_learning_main.py -c ./examples/split_learning/llm_split_learning/split_learning_wikitext2_gpt2_lora.yml
+```
+````
+
 #### Personalized Federated Learning Algorithms
 
 ````{admonition} **FedRep**

diff --git a/examples/split_learning/llm_split_learning/split_learning_llm_model.py b/examples/split_learning/llm_split_learning/split_learning_llm_model.py
@@ -0,0 +1,195 @@
+"""
+Obtain LLM models from Huggingface, specifically designed for split learning
+"""
+
+import torch
+from transformers import AutoModelForCausalLM, AutoConfig
+from peft import get_peft_model, LoraConfig
+from plato.config import Config
+
+
+def get_lora_model(model):
+    """Apply LoRA optimization over the model"""
+    lora_config = Config().parameters.lora
+    model = get_peft_model(model, LoraConfig(**lora_config._asdict()))
+    model.print_trainable_parameters()
+    return model
+
+
+def get_module(start_module: torch.nn.Module, module_names):
+    """
+    Recursively get a pytorch module starting from the start module with
+        a given list of module names.
+    """
+    module = start_module
+    for module_name in module_names:
+        module = getattr(module, module_name)
+    return module
+
+
+class BaseModel(torch.nn.Module):
+    """
+    The basic model loading hugginface model used for the server model and the client model
+    """
+
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self.model_name = Config().trainer.model_name
+        use_auth_token = None
+        if hasattr(Config().parameters, "huggingface_token"):
+            use_auth_token = Config().parameters.huggingface_token
+        config_kwargs = {
+            "cache_dir": None,
+            "revision": "main",
+            "use_auth_token": use_auth_token,
+        }
+
+        self.config = AutoConfig.from_pretrained(self.model_name, **config_kwargs)
+
+        self.base_model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
+            config=self.config,
+            cache_dir=Config().params["model_path"] + "/huggingface",
+            token=use_auth_token,
+        )
+        self.cut_layer = Config().parameters.model.cut_layer
+
+    def get_input_embeddings(self):
+        """
+        Return the base model get input embeddings.
+        """
+        return self.base_model.get_input_embeddings()
+
+    def forward(self, inputs):
+        """
+        The forward function for the base model.
+        """
+        return self.base_model(inputs)
+
+
+class ClientModel(BaseModel):
+    """
+    The model on the clients in split learning with LLM.
+    """
+
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        # replace the layers in the base model
+        # which should be on the cloud with Identity layers()
+        transformer_module = self.base_model
+        for module_name in Config().parameters.model.transformer_module_name.split("."):
+            transformer_module = getattr(transformer_module, module_name)
+        client_layers = transformer_module[: self.cut_layer]
+        client_module_names = Config().parameters.model.transformer_module_name.split(
+            "."
+        )
+        client_module = get_module(self.base_model, client_module_names[:-1])
+        setattr(client_module, client_module_names[-1], client_layers)
+        # Set layers not on the clients to Identity
+        for layer in Config().parameters.model.layers_after_transformer:
+            layer = layer.split(".")
+            if len(layer) > 1:
+                module = get_module(self.base_model, layer[:-1])
+                setattr(module, layer[-1], torch.nn.Identity())
+            else:
+                setattr(self.base_model, layer[0], torch.nn.Identity())
+        # Apply LoRA optimization
+        if hasattr(Config().parameters, "lora"):
+            self.base_model = get_lora_model(self.base_model)
+
+    def forward(self, inputs):
+        """
+        The forward function on the client
+        """
+        inputs = inputs.long()
+        return self.base_model(input_ids=inputs, return_dict=False)
+
+    def forward_to(self, inputs):
+        """
+        Forward to the cut layer and output intermediate feature
+        """
+        outputs = self.forward(inputs)
+        return outputs[0]
+
+
+class ServerModel(BaseModel):
+    """
+    The model used on the cloud
+    """
+
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        # In this design, we have two copies of the model
+        # The first copy of the model is the whole model which is used for test.
+        # The second copy of the model only contains the layers on the server
+        #   used for training.
+        self.server_model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
+            config=self.config,
+            cache_dir=Config().params["model_path"] + "/huggingface",
+        )
+        transformer_module = get_module(
+            self.base_model,
+            Config().parameters.model.transformer_module_name.split("."),
+        )
+        server_layers = transformer_module[self.cut_layer :]
+        server_module_names = Config().parameters.model.transformer_module_name.split(
+            "."
+        )
+        server_module = get_module(self.server_model, server_module_names[:-1])
+        setattr(server_module, server_module_names[-1], server_layers)
+        # Apply LoRA optimization
+        if hasattr(Config().parameters, "lora"):
+            self.base_model = get_lora_model(self.base_model)
+            self.server_model = get_lora_model(self.server_model)
+
+    def copy_weight(self):
+        """
+        Copy the weights of the training model to the testing model
+        """
+        basic_name = Config().parameters.model.transformer_module_name
+        # There will a module named base_model.model in LoRA model
+        if hasattr(Config().parameters, "lora"):
+            basic_name = "base_model.model." + basic_name
+        base_model_weights = self.base_model.state_dict()
+        server_model_weights = self.server_model.state_dict()
+
+        transformer_module = self.base_model
+        for module_name in basic_name.split("."):
+            transformer_module = getattr(transformer_module, module_name)
+        layer_names = [
+            basic_name + "." + str(index)
+            for index in range(
+                self.cut_layer,
+                len(transformer_module),
+            )
+        ]
+        for weight_name in base_model_weights.keys():
+            # Copy the weights of transformer blocks
+            for layer_index, layer_name in enumerate(layer_names):
+                if layer_name in weight_name:
+                    suffix = weight_name[
+                        weight_name.find(layer_name) + len(layer_name) :
+                    ]
+                    # The name should be completely matched
+                    if not suffix[0] == ".":
+                        continue
+                    server_weight_name = basic_name + "." + str(layer_index) + suffix
+                    base_model_weights[weight_name] = server_model_weights[
+                        server_weight_name
+                    ]
+            # Copy the weights of layers after transformer blocks
+            for layer in Config().parameters.model.layers_after_transformer:
+                layer_name = basic_name + "." + layer
+                if layer_name in weight_name:
+                    base_model_weights[weight_name] = server_model_weights[weight_name]
+
+        self.base_model.load_state_dict(base_model_weights)
+
+    def forward_from(self, inputs, labels):
+        """
+        Forward from the intermediate feature on the server.
+        """
+        labels = labels.long()
+        outputs = self.server_model(inputs_embeds=inputs, labels=labels)
+        return outputs
diff --git a/examples/split_learning/llm_split_learning/split_learning_lora_algorithm.py b/examples/split_learning/llm_split_learning/split_learning_lora_algorithm.py
@@ -0,0 +1,25 @@
+"""
+A split learning algorithm supporting LoRA fine-tuning LLMs.
+"""
+from peft import (
+    set_peft_model_state_dict,
+    get_peft_model_state_dict,
+)
+from plato.algorithms import split_learning
+
+
+class Algorithm(split_learning.Algorithm):
+    """
+    Extract and load only the LoRA weights.
+    """
+
+    def extract_weights(self, model=None):
+        # Extract LoRA wegiths
+        return {
+            k: v.cpu()
+            for k, v in get_peft_model_state_dict(self.model.base_model).items()
+        }
+
+    def load_weights(self, weights):
+        # Load LoRA weights
+        return set_peft_model_state_dict(self.model.base_model, weights)
diff --git a/examples/split_learning/llm_split_learning/split_learning_main.py b/examples/split_learning/llm_split_learning/split_learning_main.py
@@ -0,0 +1,28 @@
+"""
+Use Split learning to finetune Huggingface large language model.
+"""
+import split_learning_trainer
+from split_learning_llm_model import ServerModel, ClientModel
+from split_learning_lora_algorithm import Algorithm as LoRAAlgorithm
+
+from plato.servers.split_learning import Server
+from plato.clients.split_learning import Client
+from plato.config import Config
+
+
+def main():
+    """A Plato federated learning training session using the split learning algorithm."""
+
+    algorithm = LoRAAlgorithm if hasattr(Config().parameters, "lora") else None
+
+    client = Client(
+        trainer=split_learning_trainer.Trainer, model=ClientModel, algorithm=algorithm
+    )
+    server = Server(
+        trainer=split_learning_trainer.Trainer, model=ServerModel, algorithm=algorithm
+    )
+    server.run(client)
+
+
+if __name__ == "__main__":
+    main()