diff --git a/env-dev.yml b/env-dev.yml
index bf5d19a..a0bfd98 100644
--- a/env-dev.yml
+++ b/env-dev.yml
@@ -18,7 +18,6 @@ dependencies:
   - sphinx
   - black
   - flake8
-  - pytest
   # pip
   - pip
   - pip:
diff --git a/hydromodel/datasets/data_preprocess.py b/hydromodel/datasets/data_preprocess.py
index 0560de4..266efb5 100644
--- a/hydromodel/datasets/data_preprocess.py
+++ b/hydromodel/datasets/data_preprocess.py
@@ -1,7 +1,7 @@
 """
 Author: Wenyu Ouyang
 Date: 2022-10-25 21:16:22
-LastEditTime: 2024-03-25 19:54:15
+LastEditTime: 2024-03-26 19:18:29
 LastEditors: Wenyu Ouyang
 Description: preprocess data for models in hydro-model-xaj
 FilePath: \hydro-model-xaj\hydromodel\datasets\data_preprocess.py
@@ -10,15 +10,15 @@
 
 import os
 import re
+from hydrodataset import Camels
 import numpy as np
 import pandas as pd
 from sklearn.model_selection import KFold
-from collections import OrderedDict
 import xarray as xr
 
-from hydroutils import hydro_time, hydro_file
+from hydrodata.utils.utils import streamflow_unit_conv
 
-from hydromodel import CACHE_DIR
+from hydromodel import CACHE_DIR, SETTING
 from hydromodel.datasets import *
 
 
@@ -293,7 +293,8 @@ def split_train_test(ts_data, train_period, test_period):
 
     Returns
     -------
-    None
+    tuple of xr.Dataset
+        A tuple of xr.Dataset for training and testing data
     """
     # Convert date strings to pandas datetime objects
     train_start, train_end = pd.to_datetime(train_period[0]), pd.to_datetime(
@@ -387,3 +388,93 @@ def cross_valid_data(ts_data, period, warmup, cv_fold, freq="1D"):
         train_test_data.append((train_data, test_data))
 
     return train_test_data
+
+
+def get_ts_from_diffsource(data_type, data_dir, periods, basin_ids):
+    """Get time series data from different sources and unify the format and unit of streamflow.
+
+    Parameters
+    ----------
+    data_type
+        The type of the data source, 'camels' or 'owndata'
+    data_dir
+        The directory of the data source
+    periods
+        The periods of the time series data
+    basin_ids
+        The ids of the basins
+
+    Returns
+    -------
+    xr.Dataset
+        The time series data
+
+    Raises
+    ------
+    NotImplementedError
+        The data type is not 'camels' or 'owndata'
+    """
+    prcp_name = remove_unit_from_name(PRCP_NAME)
+    pet_name = remove_unit_from_name(PET_NAME)
+    flow_name = remove_unit_from_name(FLOW_NAME)
+    area_name = remove_unit_from_name(AREA_NAME)
+    if data_type == "camels":
+        camels_data_dir = os.path.join(
+            SETTING["local_data_path"]["datasets-origin"], "camels", data_dir
+        )
+        camels = Camels(camels_data_dir)
+        ts_data = camels.read_ts_xrdataset(
+            basin_ids, periods, ["prcp", "PET", "streamflow"]
+        )
+        basin_area = camels.read_area(basin_ids)
+        # trans unit to mm/day
+        qobs_ = ts_data[["streamflow"]]
+        target_unit = ts_data["prcp"].attrs.get("units", "unknown")
+        r_mmd = streamflow_unit_conv(qobs_, basin_area, target_unit=target_unit)
+        ts_data[flow_name] = r_mmd["streamflow"]
+        ts_data[flow_name].attrs["units"] = target_unit
+        ts_data = ts_data.rename({"PET": pet_name})
+        # ts_data = ts_data.drop_vars('streamflow')
+    elif data_type == "owndata":
+        ts_data = xr.open_dataset(
+            os.path.join(os.path.dirname(data_dir), "timeseries.nc")
+        )
+        attr_data = xr.open_dataset(
+            os.path.join(os.path.dirname(data_dir), "attributes.nc")
+        )
+        basin_area = attr_data[area_name].values
+        target_unit = ts_data[prcp_name].attrs.get("units", "unknown")
+        qobs_ = ts_data[[flow_name]]
+        r_mmd = streamflow_unit_conv(qobs_, basin_area, target_unit=target_unit)
+        ts_data[flow_name] = r_mmd[flow_name]
+        ts_data[flow_name].attrs["units"] = target_unit
+    else:
+        raise NotImplementedError(
+            "You should set the data type as 'camels' or 'owndata'"
+        )
+
+    return ts_data
+
+
+def get_pe_q_from_ts(ts_xr_dataset):
+    """Transform the time series data to the format that can be used in the calibration process
+
+    Parameters
+    ----------
+    ts_xr_dataset : xr.Dataset
+        The time series data
+
+    Returns
+    -------
+    tuple[np.ndarray, np.ndarray]
+        The tuple contains the precipitation and evaporation data and the observed streamflow data
+    """
+    prcp_name = remove_unit_from_name(PRCP_NAME)
+    pet_name = remove_unit_from_name(PET_NAME)
+    flow_name = remove_unit_from_name(FLOW_NAME)
+    p_and_e = (
+        ts_xr_dataset[[prcp_name, pet_name]].to_array().to_numpy().transpose(2, 1, 0)
+    )
+    qobs = np.expand_dims(ts_xr_dataset[flow_name].to_numpy().transpose(1, 0), axis=2)
+
+    return p_and_e, qobs
diff --git a/hydromodel/trainers/train_utils.py b/hydromodel/trainers/train_utils.py
index 9dd6341..b6a42db 100644
--- a/hydromodel/trainers/train_utils.py
+++ b/hydromodel/trainers/train_utils.py
@@ -1,7 +1,7 @@
 """
 Author: Wenyu Ouyang
 Date: 2022-10-25 21:16:22
-LastEditTime: 2024-03-22 20:07:08
+LastEditTime: 2024-03-26 18:20:57
 LastEditors: Wenyu Ouyang
 Description: Plots for calibration and testing results
 FilePath: \hydro-model-xaj\hydromodel\trainers\train_utils.py
@@ -64,7 +64,6 @@ def plot_train_iteration(likelihood, save_fig):
 
 
 def show_calibrate_result(
-    spot_setup,
     sceua_calibrated_file,
     warmup_length,
     save_dir,
@@ -79,8 +78,6 @@ def show_calibrate_result(
 
     Parameters
     ----------
-    spot_setup
-        Spotpy's setup class instance
     sceua_calibrated_file
         the result file saved after optimizing
     basin_id
diff --git a/scripts/calibrate_xaj.py b/scripts/calibrate_xaj.py
index 1e4d462..1367c4d 100644
--- a/scripts/calibrate_xaj.py
+++ b/scripts/calibrate_xaj.py
@@ -1,7 +1,7 @@
 """
 Author: Wenyu Ouyang
 Date: 2022-11-19 17:27:05
-LastEditTime: 2024-03-26 16:54:05
+LastEditTime: 2024-03-26 18:55:25
 LastEditors: Wenyu Ouyang
 Description: the script to calibrate a model for CAMELS basin
 FilePath: \hydro-model-xaj\scripts\calibrate_xaj.py
@@ -14,18 +14,18 @@
 import sys
 import os
 from pathlib import Path
-import xarray as xr
 import yaml
 
-from hydrodataset import Camels
-from hydrodata.utils.utils import streamflow_unit_conv
-
 
 repo_path = os.path.dirname(Path(os.path.abspath(__file__)).parent)
 sys.path.append(repo_path)
-from hydromodel import SETTING
 from hydromodel.datasets import *
-from hydromodel.datasets.data_preprocess import cross_valid_data, split_train_test
+from hydromodel.datasets.data_preprocess import (
+    cross_valid_data,
+    split_train_test,
+    get_ts_from_diffsource,
+    get_pe_q_from_ts,
+)
 from hydromodel.trainers.calibrate_sceua import calibrate_by_sceua
 
 
@@ -42,22 +42,7 @@ def calibrate(args):
     model_info = args.model
     algo_info = args.algorithm
     loss = args.loss
-    if data_type == "camels":
-        camels_data_dir = os.path.join(
-            SETTING["local_data_path"]["datasets-origin"], "camels", data_dir
-        )
-        camels = Camels(camels_data_dir)
-        ts_data = camels.read_ts_xrdataset(
-            basin_ids, periods, ["prcp", "PET", "streamflow"]
-        )
-    elif data_type == "owndata":
-        ts_data = xr.open_dataset(
-            os.path.join(os.path.dirname(data_dir), "timeseries.nc")
-        )
-    else:
-        raise NotImplementedError(
-            "You should set the data type as 'camels' or 'owndata'"
-        )
+    ts_data = get_ts_from_diffsource(data_type, data_dir, periods, basin_ids)
 
     where_save = Path(os.path.join(repo_path, "result", exp))
     if os.path.exists(where_save) is False:
@@ -68,55 +53,39 @@ def calibrate(args):
         periods = np.sort(
             [train_period[0], train_period[1], test_period[0], test_period[1]]
         )
-    if cv_fold > 1:
-        train_and_test_data = cross_valid_data(ts_data, periods, warmup, cv_fold)
-    else:
-        # when using train_test_split, the warmup period is not used
-        # so you should include the warmup period in the train and test period
         train_and_test_data = split_train_test(ts_data, train_period, test_period)
+    else:
+        # cross validation
+        train_and_test_data = cross_valid_data(ts_data, periods, warmup, cv_fold)
+
     print("Start to calibrate the model")
 
-    if data_type == "camels":
-        basin_area = camels.read_area(basin_ids)
-        p_and_e = (
-            train_and_test_data[0][["prcp", "PET"]]
-            .to_array()
-            .to_numpy()
-            .transpose(2, 1, 0)
-        )
-        # trans unit to mm/day
-        qobs_ = train_and_test_data[0][["streamflow"]]
-        r_mmd = streamflow_unit_conv(qobs_, basin_area, target_unit="mm/d")
-        qobs = np.expand_dims(r_mmd["streamflow"].to_numpy().transpose(1, 0), axis=2)
-    elif data_type == "owndata":
-        attr_data = xr.open_dataset(
-            os.path.join(os.path.dirname(data_dir), "attributes.nc")
-        )
-        basin_area = attr_data["area"].values
-        p_and_e = (
-            train_and_test_data[0][[PRCP_NAME, PET_NAME]]
-            .to_array()
-            .to_numpy()
-            .transpose(2, 1, 0)
-        )
-        qobs = np.expand_dims(
-            train_and_test_data[0][[FLOW_NAME]].to_array().to_numpy().transpose(1, 0),
-            axis=2,
+    if cv_fold <= 1:
+        p_and_e, qobs = get_pe_q_from_ts(train_and_test_data[0])
+        calibrate_by_sceua(
+            basin_ids,
+            p_and_e,
+            qobs,
+            os.path.join(where_save, "sceua_xaj"),
+            warmup,
+            model=model_info,
+            algorithm=algo_info,
+            loss=loss,
         )
     else:
-        raise NotImplementedError(
-            "You should set the data type as 'camels' or 'owndata'"
-        )
-    calibrate_by_sceua(
-        basin_ids,
-        p_and_e,
-        qobs,
-        os.path.join(where_save, "sceua_xaj"),
-        warmup,
-        model=model_info,
-        algorithm=algo_info,
-        loss=loss,
-    )
+        for i in range(cv_fold):
+            train_data, _ = train_and_test_data[i]
+            p_and_e_cv, qobs_cv = get_pe_q_from_ts(train_data)
+            calibrate_by_sceua(
+                basin_ids,
+                p_and_e_cv,
+                qobs_cv,
+                os.path.join(where_save, f"sceua_xaj_cv{i+1}"),
+                warmup,
+                model=model_info,
+                algorithm=algo_info,
+                loss=loss,
+            )
     # Convert the arguments to a dictionary
     args_dict = vars(args)
     # Save the arguments to a YAML file
@@ -155,7 +124,7 @@ def calibrate(args):
         "--cv_fold",
         dest="cv_fold",
         help="the number of cross-validation fold",
-        default=1,
+        default=2,
         type=int,
     )
     parser.add_argument(
diff --git a/scripts/calibrate_xaj_for_multicases.py b/scripts/calibrate_xaj_for_multicases.py
index 64d3021..babb920 100644
--- a/scripts/calibrate_xaj_for_multicases.py
+++ b/scripts/calibrate_xaj_for_multicases.py
@@ -13,7 +13,7 @@
 from pathlib import Path
 
 sys.path.append(os.path.dirname(Path(os.path.abspath(__file__)).parent))
-from scripts.evaluate_xaj import calibrate
+from scripts.evaluate_xaj import evaluate
 
 matplotlib.use("Agg")
 exp = "exp61561"
@@ -49,4 +49,4 @@ def __init__(self, exp, book, source, rep, ngs):
         rep = reps[i]
         ng = ngs[j]
         xaj_calibrate = XAJCalibrateMultiCases(exp, book, source, rep, ng)
-        calibrate(xaj_calibrate)
+        evaluate(xaj_calibrate)
diff --git a/scripts/evaluate_xaj.py b/scripts/evaluate_xaj.py
index 7a44e97..f529e9c 100644
--- a/scripts/evaluate_xaj.py
+++ b/scripts/evaluate_xaj.py
@@ -1,16 +1,17 @@
 import argparse
-import json
 import socket
 import fnmatch
 from datetime import datetime
 import numpy as np
 import pandas as pd
+import yaml
 import os
 import sys
 from pathlib import Path
 from hydroutils import hydro_file
 
-sys.path.append(os.path.dirname(Path(os.path.abspath(__file__)).parent))
+repo_path = os.path.dirname(Path(os.path.abspath(__file__)).parent)
+sys.path.append(repo_path)
 from hydromodel.trainers.calibrate_sceua import calibrate_by_sceua
 from hydromodel.datasets.data_postprocess import (
     renormalize_params,
@@ -24,51 +25,28 @@
 from hydromodel.trainers.calibrate_ga import calibrate_by_ga, show_ga_result
 
 
-def calibrate(args):
+def read_yaml_config(file_path):
+    with open(file_path, "r") as file:
+        config = yaml.safe_load(file)
+    return config
+
+
+def evaluate(args):
     exp = args.exp
     warmup = args.warmup_length
-    data_dir = os.path.join(
-        "D:/研究生/毕业论文/new毕业论文/预答辩/碧流河水库/模型运行/"
-    )
-    kfold = [
-        int(f_name[len("data_info_fold") : -len("_test.json")])
-        for f_name in os.listdir(data_dir)  # 输出文件夹下的所有文件
-        if fnmatch.fnmatch(f_name, "*_fold*_test.json")
-    ]
+    cali_dir = Path(os.path.join(repo_path, "result", exp))
+    cali_config = read_yaml_config(os.path.join(cali_dir, "config.yaml"))
     kfold = np.sort(kfold)
     for fold in kfold:
         print(f"Start to calibrate the {fold}-th fold")
-        train_data_info_file = os.path.join(
-            data_dir, f"data_info_fold{str(fold)}_train.json"
-        )
-        train_data_file = os.path.join(data_dir, f"data_info_fold{str(fold)}_train.npy")
-        test_data_info_file = os.path.join(
-            data_dir, f"data_info_fold{str(fold)}_test.json"
-        )
-        test_data_file = os.path.join(data_dir, f"data_info_fold{str(fold)}_test.npy")
-        if (
-            os.path.exists(train_data_info_file) is False
-            or os.path.exists(train_data_file) is False
-            or os.path.exists(test_data_info_file) is False
-            or os.path.exists(test_data_file) is False
-        ):
-            raise FileNotFoundError(
-                "The data files are not found, please run datapreprocess4calibrate.py first."
-            )
-        data_train = hydro_file.unserialize_numpy(train_data_file)
-        data_test = hydro_file.unserialize_numpy(test_data_file)
-        data_info_train = hydro_file.unserialize_json_ordered(train_data_info_file)
-        data_info_test = hydro_file.unserialize_json_ordered(test_data_info_file)
         current_time = datetime.now().strftime("%b%d_%H-%M-%S")
         save_dir = os.path.join(
-            data_dir,
+            cali_dir,
             current_time
             + "_"
             + socket.gethostname()
             + "_fold"
             + str(fold)
-            + "_"
-            + comment,
         )
         # 读输入文件
         if os.path.exists(save_dir) is False:
@@ -86,16 +64,6 @@ def calibrate(args):
                 if not os.path.exists(spotpy_db_dir):
                     os.makedirs(spotpy_db_dir)
                 db_name = os.path.join(spotpy_db_dir, "SCEUA_" + model_info["name"])
-
-                sampler = calibrate_by_sceua(  # 率定
-                    data_train[:, i : i + 1, 0:2],
-                    data_train[:, i : i + 1, -1:],
-                    db_name,
-                    warmup_length=warmup,
-                    model=model_info,
-                    algorithm=algo_info,
-                )
-
                 show_calibrate_result(  # 展示率定结果
                     sampler.setup,
                     db_name,
@@ -223,4 +191,4 @@ def calibrate(args):
         type=int,
     )
     the_args = parser.parse_args()
-    calibrate(the_args)
+    evaluate(the_args)