diff --git a/mlpf/model/losses.py b/mlpf/model/losses.py index a0d338f00..2555891bf 100644 --- a/mlpf/model/losses.py +++ b/mlpf/model/losses.py @@ -4,6 +4,8 @@ from torch.nn import functional as F from torch import Tensor, nn +from mlpf.model.logger import _logger + def sliced_wasserstein_loss(y_pred, y_true, num_projections=200): # create normalized random basis vectors @@ -74,9 +76,9 @@ def mlpf_loss(y, ypred, batch): loss_regression_energy[batch.mask == 0] *= 0 # add weight based on target pt - # sqrt_target_pt = torch.sqrt(torch.exp(y["pt"]) * batch.X[:, :, 1]) - # loss_regression_pt *= sqrt_target_pt - # loss_regression_energy *= sqrt_target_pt + sqrt_target_pt = torch.sqrt(torch.exp(y["pt"]) * batch.X[:, :, 1]) + loss_regression_pt *= sqrt_target_pt + loss_regression_energy *= sqrt_target_pt # average over all target particles loss["Regression_pt"] = loss_regression_pt.sum() / npart @@ -122,10 +124,15 @@ def mlpf_loss(y, ypred, batch): + loss["Regression_energy"] ) loss_opt = loss["Total"] + if torch.isnan(loss_opt): + _logger.error(ypred) + _logger.error(sqrt_target_pt) + _logger.error(loss) + raise Exception("Loss became NaN") # store these separately but detached for k in loss.keys(): - loss[k] = loss[k].detach().cpu().item() + loss[k] = loss[k].detach() return loss_opt, loss diff --git a/mlpf/model/plots.py b/mlpf/model/plots.py index 43dc809c3..50f6074b0 100644 --- a/mlpf/model/plots.py +++ b/mlpf/model/plots.py @@ -123,45 +123,51 @@ def validation_plots(batch, ypred_raw, ytarget, ypred, tensorboard_writer, epoch plt.xlabel("particle proba") tensorboard_writer.add_figure("sig_proba_elemtype{}".format(int(xcls)), fig, global_step=epoch) - tensorboard_writer.add_histogram("pt_target", torch.clamp(batch.ytarget[batch.mask][:, 2], -10, 10), global_step=epoch) - tensorboard_writer.add_histogram("pt_pred", torch.clamp(ypred_raw[2][batch.mask][:, 0], -10, 10), global_step=epoch) - ratio = (ypred_raw[2][batch.mask][:, 0] / batch.ytarget[batch.mask][:, 2])[batch.ytarget[batch.mask][:, 0] != 0] - tensorboard_writer.add_histogram("pt_ratio", torch.clamp(ratio, -10, 10), global_step=epoch) - - tensorboard_writer.add_histogram("eta_target", torch.clamp(batch.ytarget[batch.mask][:, 3], -10, 10), global_step=epoch) - tensorboard_writer.add_histogram("eta_pred", torch.clamp(ypred_raw[2][batch.mask][:, 1], -10, 10), global_step=epoch) - ratio = (ypred_raw[2][batch.mask][:, 1] / batch.ytarget[batch.mask][:, 3])[batch.ytarget[batch.mask][:, 0] != 0] - tensorboard_writer.add_histogram("eta_ratio", torch.clamp(ratio, -10, 10), global_step=epoch) - - tensorboard_writer.add_histogram("sphi_target", torch.clamp(batch.ytarget[batch.mask][:, 4], -10, 10), global_step=epoch) - tensorboard_writer.add_histogram("sphi_pred", torch.clamp(ypred_raw[2][batch.mask][:, 2], -10, 10), global_step=epoch) - ratio = (ypred_raw[2][batch.mask][:, 2] / batch.ytarget[batch.mask][:, 4])[batch.ytarget[batch.mask][:, 0] != 0] - tensorboard_writer.add_histogram("sphi_ratio", torch.clamp(ratio, -10, 10), global_step=epoch) - - tensorboard_writer.add_histogram("cphi_target", torch.clamp(batch.ytarget[batch.mask][:, 5], -10, 10), global_step=epoch) - tensorboard_writer.add_histogram("cphi_pred", torch.clamp(ypred_raw[2][batch.mask][:, 3], -10, 10), global_step=epoch) - ratio = (ypred_raw[2][batch.mask][:, 3] / batch.ytarget[batch.mask][:, 5])[batch.ytarget[batch.mask][:, 0] != 0] - tensorboard_writer.add_histogram("cphi_ratio", torch.clamp(ratio, -10, 10), global_step=epoch) - - tensorboard_writer.add_histogram("energy_target", torch.clamp(batch.ytarget[batch.mask][:, 6], -10, 10), global_step=epoch) - tensorboard_writer.add_histogram("energy_pred", torch.clamp(ypred_raw[2][batch.mask][:, 4], -10, 10), global_step=epoch) - ratio = (ypred_raw[2][batch.mask][:, 4] / batch.ytarget[batch.mask][:, 6])[batch.ytarget[batch.mask][:, 0] != 0] - tensorboard_writer.add_histogram("energy_ratio", torch.clamp(ratio, -10, 10), global_step=epoch) - - for attn in sorted(list(glob.glob(f"{outdir}/attn_conv_*.npz"))): - attn_name = os.path.basename(attn).split(".")[0] - attn_matrix = np.load(attn)["att"] - batch_size = min(attn_matrix.shape[0], 8) - fig, axes = plt.subplots(1, batch_size, figsize=((batch_size * 3, 1 * 3))) - if isinstance(axes, matplotlib.axes._axes.Axes): - axes = [axes] - for ibatch in range(batch_size): - plt.sca(axes[ibatch]) - # plot the attention matrix of the first event in the batch - plt.imshow(attn_matrix[ibatch].T, cmap="hot", norm=matplotlib.colors.LogNorm()) - plt.xticks([]) - plt.yticks([]) - plt.colorbar() - plt.title("event {}, m={:.2E}".format(ibatch, np.mean(attn_matrix[ibatch][attn_matrix[ibatch] > 0]))) - plt.suptitle(attn_name) - tensorboard_writer.add_figure(attn_name, fig, global_step=epoch) + try: + tensorboard_writer.add_histogram("pt_target", torch.clamp(batch.ytarget[batch.mask][:, 2], -10, 10), global_step=epoch) + tensorboard_writer.add_histogram("pt_pred", torch.clamp(ypred_raw[2][batch.mask][:, 0], -10, 10), global_step=epoch) + ratio = (ypred_raw[2][batch.mask][:, 0] / batch.ytarget[batch.mask][:, 2])[batch.ytarget[batch.mask][:, 0] != 0] + tensorboard_writer.add_histogram("pt_ratio", torch.clamp(ratio, -10, 10), global_step=epoch) + + tensorboard_writer.add_histogram("eta_target", torch.clamp(batch.ytarget[batch.mask][:, 3], -10, 10), global_step=epoch) + tensorboard_writer.add_histogram("eta_pred", torch.clamp(ypred_raw[2][batch.mask][:, 1], -10, 10), global_step=epoch) + ratio = (ypred_raw[2][batch.mask][:, 1] / batch.ytarget[batch.mask][:, 3])[batch.ytarget[batch.mask][:, 0] != 0] + tensorboard_writer.add_histogram("eta_ratio", torch.clamp(ratio, -10, 10), global_step=epoch) + + tensorboard_writer.add_histogram("sphi_target", torch.clamp(batch.ytarget[batch.mask][:, 4], -10, 10), global_step=epoch) + tensorboard_writer.add_histogram("sphi_pred", torch.clamp(ypred_raw[2][batch.mask][:, 2], -10, 10), global_step=epoch) + ratio = (ypred_raw[2][batch.mask][:, 2] / batch.ytarget[batch.mask][:, 4])[batch.ytarget[batch.mask][:, 0] != 0] + tensorboard_writer.add_histogram("sphi_ratio", torch.clamp(ratio, -10, 10), global_step=epoch) + + tensorboard_writer.add_histogram("cphi_target", torch.clamp(batch.ytarget[batch.mask][:, 5], -10, 10), global_step=epoch) + tensorboard_writer.add_histogram("cphi_pred", torch.clamp(ypred_raw[2][batch.mask][:, 3], -10, 10), global_step=epoch) + ratio = (ypred_raw[2][batch.mask][:, 3] / batch.ytarget[batch.mask][:, 5])[batch.ytarget[batch.mask][:, 0] != 0] + tensorboard_writer.add_histogram("cphi_ratio", torch.clamp(ratio, -10, 10), global_step=epoch) + + tensorboard_writer.add_histogram("energy_target", torch.clamp(batch.ytarget[batch.mask][:, 6], -10, 10), global_step=epoch) + tensorboard_writer.add_histogram("energy_pred", torch.clamp(ypred_raw[2][batch.mask][:, 4], -10, 10), global_step=epoch) + ratio = (ypred_raw[2][batch.mask][:, 4] / batch.ytarget[batch.mask][:, 6])[batch.ytarget[batch.mask][:, 0] != 0] + tensorboard_writer.add_histogram("energy_ratio", torch.clamp(ratio, -10, 10), global_step=epoch) + except ValueError as e: + print(e) + + try: + for attn in sorted(list(glob.glob(f"{outdir}/attn_conv_*.npz"))): + attn_name = os.path.basename(attn).split(".")[0] + attn_matrix = np.load(attn)["att"] + batch_size = min(attn_matrix.shape[0], 8) + fig, axes = plt.subplots(1, batch_size, figsize=((batch_size * 3, 1 * 3))) + if isinstance(axes, matplotlib.axes._axes.Axes): + axes = [axes] + for ibatch in range(batch_size): + plt.sca(axes[ibatch]) + # plot the attention matrix of the first event in the batch + plt.imshow(attn_matrix[ibatch].T, cmap="hot", norm=matplotlib.colors.LogNorm()) + plt.xticks([]) + plt.yticks([]) + plt.colorbar() + plt.title("event {}, m={:.2E}".format(ibatch, np.mean(attn_matrix[ibatch][attn_matrix[ibatch] > 0]))) + plt.suptitle(attn_name) + tensorboard_writer.add_figure(attn_name, fig, global_step=epoch) + except ValueError as e: + print(e) diff --git a/mlpf/model/training.py b/mlpf/model/training.py index 4c7c4f3ba..b6412fe19 100644 --- a/mlpf/model/training.py +++ b/mlpf/model/training.py @@ -67,57 +67,26 @@ def configure_model_trainable(model: MLPF, trainable: Union[str, List[str]], is_ model.eval() -def train_step(batch, model, optimizer, lr_schedule, loss_fn): - """Single training step logic - - Args: - batch: The input batch data - model: The neural network model - optimizer: The optimizer - lr_schedule: Learning rate scheduler - loss_fn: Loss function to use - - Returns: - dict: Dictionary containing all computed losses with gradient detached - """ +def model_step(batch, model, loss_fn): ypred_raw = model(batch.X, batch.mask) ypred = unpack_predictions(ypred_raw) ytarget = unpack_target(batch.ytarget, model) - loss_opt, losses_detached = loss_fn(ytarget, ypred, batch) + return loss_opt, losses_detached, ypred_raw, ypred, ytarget + +def optimizer_step(model, loss_opt, optimizer, lr_schedule, scaler): # Clear gradients for param in model.parameters(): param.grad = None # Backward pass and optimization - loss_opt.backward() - optimizer.step() + scaler.scale(loss_opt).backward() + scaler.step(optimizer) + scaler.update() if lr_schedule: lr_schedule.step() - return losses_detached - - -def eval_step(batch, model, loss_fn): - """Single evaluation step logic - - Args: - batch: The input batch data - model: The neural network model - loss_fn: Loss function to use - - Returns: - tuple: (losses dict, predictions dict, targets dict) - """ - with torch.no_grad(): - ypred_raw = model(batch.X, batch.mask) - ypred = unpack_predictions(ypred_raw) - ytarget = unpack_target(batch.ytarget, model) - _, losses_detached = loss_fn(ytarget, ypred, batch) - - return losses_detached, ypred_raw, ypred, ytarget - def train_epoch( rank: Union[int, str], @@ -133,6 +102,7 @@ def train_epoch( checkpoint_dir="", device_type="cuda", dtype=torch.float32, + scaler=None, ): """Run one training epoch @@ -167,7 +137,9 @@ def train_epoch( batch = batch.to(rank, non_blocking=True) with torch.autocast(device_type=device_type, dtype=dtype, enabled=device_type == "cuda"): - loss = train_step(batch, model, optimizer, lr_schedule, mlpf_loss) + loss_opt, loss, _, _, _ = model_step(batch, model, mlpf_loss) + + optimizer_step(model, loss_opt, optimizer, lr_schedule, scaler) # Accumulate losses for loss_name in loss: @@ -191,14 +163,14 @@ def train_epoch( comet_experiment.log_metric("learning_rate", lr_schedule.get_last_lr(), step=step) # Average losses across steps - num_steps = len(train_loader) + num_steps = torch.tensor(float(len(train_loader)), device=rank, dtype=torch.float32) if world_size > 1: torch.distributed.all_reduce(num_steps) for loss_name in epoch_loss: if world_size > 1: torch.distributed.all_reduce(epoch_loss[loss_name]) - epoch_loss[loss_name] = epoch_loss[loss_name] / num_steps + epoch_loss[loss_name] = epoch_loss[loss_name].cpu().item() / num_steps.cpu().item() if world_size > 1: dist.barrier() @@ -261,7 +233,8 @@ def eval_epoch( set_save_attention(model, outdir, False) with torch.autocast(device_type=device_type, dtype=dtype, enabled=device_type == "cuda"): - loss, ypred_raw, ypred, ytarget = eval_step(batch, model, mlpf_loss) + with torch.no_grad(): + loss_opt, loss, ypred_raw, ypred, ytarget = model_step(batch, model, mlpf_loss) # Update confusion matrices cm_X_target += sklearn.metrics.confusion_matrix( @@ -297,14 +270,14 @@ def eval_epoch( ) # Average losses across steps - num_steps = len(valid_loader) + num_steps = torch.tensor(float(len(valid_loader)), device=rank, dtype=torch.float32) if world_size > 1: torch.distributed.all_reduce(num_steps) for loss_name in epoch_loss: if world_size > 1: torch.distributed.all_reduce(epoch_loss[loss_name]) - epoch_loss[loss_name] = epoch_loss[loss_name] / num_steps + epoch_loss[loss_name] = epoch_loss[loss_name].cpu().item() / num_steps.cpu().item() if world_size > 1: dist.barrier() @@ -383,6 +356,8 @@ def train_all_epochs( stale_epochs = torch.tensor(0, device=rank) best_val_loss = float("inf") + scaler = torch.amp.GradScaler() + for epoch in range(start_epoch, num_epochs + 1): epoch_start_time = time.time() @@ -401,6 +376,7 @@ def train_all_epochs( checkpoint_dir=checkpoint_dir, device_type=device_type, dtype=dtype, + scaler=scaler, ) train_time = time.time() - epoch_start_time @@ -430,21 +406,6 @@ def train_all_epochs( # Handle checkpointing and early stopping on rank 0 if (rank == 0) or (rank == "cpu"): - - # evaluate the model at this epoch on test datasets, make plots, track metrics - testdir_name = f"_epoch_{epoch}" - for sample in config["test_dataset"]: - run_test(rank, world_size, config, outdir, model, sample, testdir_name, dtype) - plot_metrics = make_plots(outdir, sample, config["dataset"], testdir_name, config["ntest"]) - - # track the following jet metrics in tensorboard - for k in ["med", "iqr", "match_frac"]: - tensorboard_writer_valid.add_scalar( - "epoch/{}/jet_ratio/jet_ratio_target_to_pred_pt/{}".format(sample, k), - plot_metrics["jet_ratio"]["jet_ratio_target_to_pred_pt"][k], - epoch, - ) - # Log learning rate tensorboard_writer_train.add_scalar("epoch/learning_rate", lr_schedule.get_last_lr()[0], epoch) @@ -504,6 +465,20 @@ def train_all_epochs( tensorboard_writer_train.flush() tensorboard_writer_valid.flush() + # evaluate the model at this epoch on test datasets, make plots, track metrics + testdir_name = f"_epoch_{epoch}" + for sample in config["enabled_test_datasets"]: + run_test(rank, world_size, config, outdir, model, sample, testdir_name, dtype) + plot_metrics = make_plots(outdir, sample, config["dataset"], testdir_name, config["ntest"]) + + # track the following jet metrics in tensorboard + for k in ["med", "iqr", "match_frac"]: + tensorboard_writer_valid.add_scalar( + "epoch/{}/jet_ratio/jet_ratio_target_to_pred_pt/{}".format(sample, k), + plot_metrics["jet_ratio"]["jet_ratio_target_to_pred_pt"][k], + epoch, + ) + # Ray training specific logging if use_ray: import ray @@ -787,14 +762,14 @@ def run(rank, world_size, config, outdir, logfile): testdir_name = "_best_weights" if config["test"]: - for sample in config["test_dataset"]: + for sample in config["enabled_test_datasets"]: run_test(rank, world_size, config, outdir, model, sample, testdir_name, dtype) # make plots only on a single machine if (rank == 0) or (rank == "cpu"): if config["make_plots"]: ntest_files = -1 - for sample in config["test_dataset"]: + for sample in config["enabled_test_datasets"]: _logger.info(f"Plotting distributions for {sample}") make_plots(outdir, sample, config["dataset"], testdir_name, ntest_files) @@ -817,8 +792,13 @@ def override_config(config: dict, args): for model in ["gnn_lsh", "attention", "attention", "mamba"]: config["model"][model]["num_convs"] = args.num_convs + config["enabled_test_datasets"] = list(config["test_dataset"].keys()) if len(args.test_datasets) != 0: - config["test_dataset"] = args.test_datasets + config["enabled_test_datasets"] = args.test_datasets + + config["train"] = args.train + config["test"] = args.test + config["make_plots"] = args.make_plots return config diff --git a/mlpf/plotting/plot_utils.py b/mlpf/plotting/plot_utils.py index 8a79cf5e3..cf817a1d7 100644 --- a/mlpf/plotting/plot_utils.py +++ b/mlpf/plotting/plot_utils.py @@ -136,6 +136,7 @@ def get_class_names(sample_name): "cms_pf_single_pi0": r"single neutral pion particle gun events", "cms_pf_single_proton": r"single proton particle gun events", "cms_pf_single_tau": r"single tau particle gun events", + "cms_pf_single_k0": r"single K0 particle gun events", "cms_pf_sms_t1tttt": r"sms t1tttt events", } @@ -418,7 +419,7 @@ def compute_3dmomentum_and_ratio(yvals): } -def save_img(outfile, epoch, cp_dir=None, comet_experiment=None): +def save_img(outfile, epoch=None, cp_dir=None, comet_experiment=None): if cp_dir: image_path = str(cp_dir / outfile) plt.savefig(image_path, dpi=100, bbox_inches="tight") diff --git a/mlpf/timing.py b/mlpf/timing.py index 4f4d6cdc1..bbd3cb369 100644 --- a/mlpf/timing.py +++ b/mlpf/timing.py @@ -77,11 +77,11 @@ def get_mem_mb(use_gpu): sess_options.add_session_config_entry("session.intra_op.allow_spinning", "1") onnx_sess = rt.InferenceSession(args.model, sess_options, providers=EP_list) - # warmup mem_onnx = get_mem_mb(use_gpu) print("mem_onnx", mem_onnx) + # warmup X = np.array(np.random.randn(batch_size, bin_size, num_features), getattr(np, args.input_dtype)) for i in range(10): onnx_sess.run(None, {"Xfeat_normed": X, "mask": (X[..., 0] != 0).astype(np.float32)}) @@ -103,9 +103,12 @@ def get_mem_mb(use_gpu): # transfer data to GPU, run model, transfer data back t0 = time.time() - # pred_onx = onnx_sess.run(None, {"Xfeat_normed": X, "l_mask_": X[..., 0]==0}) - pred_onx = onnx_sess.run(None, {"Xfeat_normed": X, "mask": (X[..., 0] != 0).astype(np.float32)}) - t1 = time.time() + try: + onnx_sess.run(None, {"Xfeat_normed": X, "mask": (X[..., 0] != 0).astype(np.float32)}) + t1 = time.time() + except Exception as e: + print(e) + t1 = t0 dt = (t1 - t0) / batch_size times.append(dt) diff --git a/notebooks/cms/cms-runtimes.ipynb b/notebooks/cms/cms-runtimes.ipynb index 181e4e579..137ad3b52 100644 --- a/notebooks/cms/cms-runtimes.ipynb +++ b/notebooks/cms/cms-runtimes.ipynb @@ -9,7 +9,15 @@ "source": [ "import matplotlib.pyplot as plt\n", "import pandas as pd\n", - "import numpy as np" + "import numpy as np\n", + "\n", + "import mplhep\n", + "mplhep.style.use(\"CMS\")\n", + "\n", + "import sys\n", + "sys.path += [\"../../mlpf/\"]\n", + "sys.path += [\"../../mlpf/plotting/\"]\n", + "from plot_utils import cms_label" ] }, { @@ -19,16 +27,27 @@ "metadata": {}, "outputs": [], "source": [ - "s1 = \"\"\"\n", - "timing/gpu_fp32_fused.txt:Nelem=2560 mean_time=6.99 ms stddev_time=2.89 ms mem_used=1678 MB\n", - "timing/gpu_fp32_fused.txt:Nelem=5120 mean_time=16.59 ms stddev_time=0.15 ms mem_used=1946 MB\n", - "timing/gpu_fp32_fused.txt:Nelem=10240 mean_time=53.13 ms stddev_time=0.23 ms mem_used=1946 MB\n", + "s_fused_bs2 = \"\"\"\n", + "timing/gpu_fp32_fused_bs2.txt:Nelem=2560 mean_time=20.38 ms stddev_time=0.08 ms mem_used=1268 MB\n", + "timing/gpu_fp32_fused_bs2.txt:Nelem=5120 mean_time=49.49 ms stddev_time=0.06 ms mem_used=1537 MB\n", + "timing/gpu_fp32_fused_bs2.txt:Nelem=10240 mean_time=135.40 ms stddev_time=0.07 ms mem_used=2074 MB\n", + "\"\"\"\n", + "\n", + "s_fused_bs4 = \"\"\"\n", + "timing/gpu_fp32_fused_bs4.txt:Nelem=2560 mean_time=19.93 ms stddev_time=0.03 ms mem_used=1537 MB\n", + "timing/gpu_fp32_fused_bs4.txt:Nelem=5120 mean_time=48.39 ms stddev_time=0.04 ms mem_used=2074 MB\n", + "timing/gpu_fp32_fused_bs4.txt:Nelem=10240 mean_time=134.42 ms stddev_time=0.06 ms mem_used=3147 MB\n", + "\"\"\"\n", + "\n", + "s_fused_bs1 = \"\"\"\n", + "timing/gpu_fp32_fused.txt:Nelem=2560 mean_time=18.99 ms stddev_time=0.10 ms mem_used=1134 MB\n", + "timing/gpu_fp32_fused.txt:Nelem=5120 mean_time=50.43 ms stddev_time=0.10 ms mem_used=1268 MB\n", + "timing/gpu_fp32_fused.txt:Nelem=10240 mean_time=137.60 ms stddev_time=0.14 ms mem_used=1537 MB\n", "\"\"\"\n", "\n", - "s2 = \"\"\"\n", - "timing/gpu_fp32_unfused.txt:Nelem=2560 mean_time=39.31 ms stddev_time=1.73 ms mem_used=3817 MB\n", - "timing/gpu_fp32_unfused.txt:Nelem=5120 mean_time=130.18 ms stddev_time=6.52 ms mem_used=12407 MB\n", - "timing/gpu_fp32_unfused.txt:Nelem=10240 mean_time=465.09 ms stddev_time=25.82 ms mem_used=46766 MB\n", + "s_unfused = \"\"\"\n", + "timing/gpu_fp32_unfused.txt:Nelem=2560 mean_time=79.48 ms stddev_time=0.35 ms mem_used=2208 MB\n", + "timing/gpu_fp32_unfused.txt:Nelem=5120 mean_time=289.87 ms stddev_time=1.72 ms mem_used=6503 MB\n", "\"\"\"" ] }, @@ -66,8 +85,10 @@ "metadata": {}, "outputs": [], "source": [ - "data_fused = parse_str(s1)\n", - "data_unfused = parse_str(s2)" + "data_fused_bs1 = parse_str(s_fused_bs1)\n", + "data_fused_bs2 = parse_str(s_fused_bs2)\n", + "data_fused_bs4 = parse_str(s_fused_bs4)\n", + "data_unfused = parse_str(s_unfused)" ] }, { @@ -77,13 +98,18 @@ "metadata": {}, "outputs": [], "source": [ - "plt.errorbar(data_unfused[\"Nelem\"], data_unfused[\"mean_time\"], yerr=data_unfused[\"stddev_time\"], marker=\"o\", label=\"ONNX unfused attention\")\n", - "plt.errorbar(data_fused[\"Nelem\"], data_fused[\"mean_time\"], yerr=data_fused[\"stddev_time\"], marker=\"o\", label=\"ONNX fused attention\")\n", - "plt.xticks(data_fused[\"Nelem\"])\n", + "plt.figure()\n", + "ax = plt.axes()\n", + "plt.errorbar(data_unfused[\"Nelem\"], data_unfused[\"mean_time\"], yerr=data_unfused[\"stddev_time\"], marker=\"o\", label=\"unfused\")\n", + "plt.errorbar(data_fused_bs1[\"Nelem\"], data_fused_bs1[\"mean_time\"], yerr=data_fused_bs1[\"stddev_time\"], marker=\"^\", label=\"fused, bs1\")\n", + "plt.errorbar(data_fused_bs4[\"Nelem\"], data_fused_bs4[\"mean_time\"], yerr=data_fused_bs4[\"stddev_time\"], marker=\"v\", label=\"fused, bs4\")\n", + "plt.xticks(data_fused_bs1[\"Nelem\"])\n", "plt.ylabel(\"Runtime per event [ms]\")\n", - "plt.xlabel(\"Elements per event\")\n", - "plt.title(\"MLPF runtime, 2x6 layers, ONNX backend, A100\")\n", - "plt.legend(loc=\"best\")" + "plt.xlabel(\"Tracks/clusters per event\")\n", + "plt.legend(loc=1, title=\"MLPF-2x3x256\\nONNX backend\\nA100-1g.10gb\")\n", + "plt.ylim(0,400)\n", + "cms_label(ax)\n", + "plt.savefig(\"runtime.pdf\")" ] }, { @@ -93,16 +119,21 @@ "metadata": {}, "outputs": [], "source": [ - "plt.errorbar(data_unfused[\"Nelem\"], data_unfused[\"mem_used\"], marker=\"o\", label=\"ONNX unfused attention\")\n", - "plt.errorbar(data_fused[\"Nelem\"], data_fused[\"mem_used\"], marker=\"o\", label=\"ONNX fused attention\")\n", - "plt.xticks(data_fused[\"Nelem\"])\n", - "plt.ylabel(\"GPU memory used [MB]\")\n", - "plt.xlabel(\"Elements per event\")\n", - "plt.title(\"MLPF memory, 2x6 layers, ONNX backend, A100\")\n", - "plt.yscale(\"log\")\n", - "plt.legend(loc=\"best\")\n", - "ytick = [1000,2000,10000,20000,40000]\n", - "plt.yticks(ytick, ytick)" + "plt.figure()\n", + "ax = plt.axes()\n", + "plt.errorbar(data_unfused[\"Nelem\"], data_unfused[\"mem_used\"], marker=\"o\", label=\"unfused\")\n", + "plt.errorbar(data_fused_bs1[\"Nelem\"], data_fused_bs1[\"mem_used\"], marker=\"^\", label=\"fused, bs1\")\n", + "plt.errorbar(data_fused_bs4[\"Nelem\"], data_fused_bs4[\"mem_used\"]/4, marker=\"v\", label=\"fused, bs4\")\n", + "plt.xticks(data_fused_bs1[\"Nelem\"])\n", + "plt.ylabel(\"GPU memory per event [MB]\")\n", + "plt.xlabel(\"Tracks/clusters per event\")\n", + "plt.axhline(10200, color=\"red\", ls=\"--\", label=\"GPU limit\")\n", + "#plt.yscale(\"log\")\n", + "#ytick = [256,512,1024,2048,4096,8192]\n", + "#plt.yticks(ytick, ytick)\n", + "plt.legend(loc=1, title=\"MLPF-2x3x256\\nONNX backend\\nA100-1g.10gb\")\n", + "cms_label(ax)\n", + "plt.savefig(\"memory.pdf\")" ] }, { @@ -241,7 +272,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/notebooks/cms/cms-validate-onnx.ipynb b/notebooks/cms/cms-validate-onnx.ipynb index c5c84920e..4519dfea6 100644 --- a/notebooks/cms/cms-validate-onnx.ipynb +++ b/notebooks/cms/cms-validate-onnx.ipynb @@ -111,15 +111,15 @@ "outputs": [], "source": [ "#tfds datasets are here:\n", - "data_dir = \"/home/joosep/tensorflow_datasets/\"\n", + "data_dir = \"/scratch/persistent/joosep/tensorflow_datasets/\"\n", "dataset = \"cms_pf_ttbar_nopu\"\n", "\n", "#model checkpoints are here:\n", - "outdir = \"../../experiments/pyg-cms_20241101_090645_682892/\"\n", + "outdir = \"../../experiments/pyg-cms_20241212_101648_120237/\"\n", "\n", "#Load model weights from existing training\n", "model_state = torch.load(\n", - " outdir + \"/checkpoints/checkpoint-18-2.778778.pth\", map_location=torch.device(\"cpu\")\n", + " outdir + \"/checkpoints/checkpoint-05-3.498507.pth\", map_location=torch.device(\"cpu\")\n", ")\n", "with open(f\"{outdir}/model_kwargs.pkl\", \"rb\") as f:\n", " model_kwargs = pkl.load(f)\n", @@ -128,7 +128,7 @@ "NUM_HEADS = model_kwargs[\"num_heads\"]\n", "\n", "#set this to cuda if you are running the notebook on a GPU, otherwise use cpu\n", - "torch_device = torch.device(\"cuda\")" + "torch_device = torch.device(\"cpu\")" ] }, { @@ -143,7 +143,7 @@ "model.eval()\n", "model.load_state_dict(model_state[\"model_state_dict\"])\n", "\n", - "model = model.to(device=\"cuda\")\n", + "model = model.to(device=torch_device)\n", "\n", "#disable attention context manager (disable flash attention)\n", "for conv in model.conv_id + model.conv_reg:\n", @@ -802,7 +802,7 @@ "builder = tfds.builder(dataset, data_dir=data_dir)\n", "ds = builder.as_data_source(split=\"test\")\n", "\n", - "max_events = 10000\n", + "max_events = 100\n", "events_per_batch = 1\n", "inds = range(0, max_events, events_per_batch)\n", "\n", diff --git a/notebooks/cms/cms-validate-postprocessing.ipynb b/notebooks/cms/cms-validate-postprocessing.ipynb index a184e88e6..c45b98afb 100644 --- a/notebooks/cms/cms-validate-postprocessing.ipynb +++ b/notebooks/cms/cms-validate-postprocessing.ipynb @@ -22,6 +22,7 @@ "import tqdm\n", "import fastjet\n", "import vector\n", + "from pathlib import Path\n", "\n", "mplhep.style.use(\"CMS\")" ] @@ -43,7 +44,18 @@ "from plot_utils import ELEM_LABELS_CMS, ELEM_NAMES_CMS\n", "from plot_utils import CLASS_LABELS_CMS, CLASS_NAMES_CMS\n", "from plot_utils import cms_label, sample_label\n", - "from plot_utils import pid_to_text" + "from plot_utils import pid_to_text\n", + "from plot_utils import save_img" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f648184-dbf1-4e59-931a-ac99077e7dbd", + "metadata": {}, + "outputs": [], + "source": [ + "!ls /media/joosep/data/20240823_simcluster/pu55to75/" ] }, { @@ -56,11 +68,13 @@ "#https://jpata.web.cern.ch/jpata/mlpf/cms/20240823_simcluster/nopu/TTbar_14TeV_TuneCUETP8M1_cfi/raw\n", "\n", "sample = \"QCDForPF_14TeV_TuneCUETP8M1_cfi\"\n", + "sample_name = \"cms_pf_qcd\"\n", + "sample_pid = 11\n", "\n", "pickle_data = sum(\n", " [\n", " pickle.load(bz2.BZ2File(f, \"r\"))\n", - " for f in tqdm.tqdm(sorted(list(glob.glob(\"/local/joosep/mlpf/cms/20240823_simcluster/nopu/{}/raw/*.pkl.bz2\".format(sample))))[:100])\n", + " for f in tqdm.tqdm(sorted(list(glob.glob(\"/media/joosep/data/20240823_simcluster/pu55to75/{}/raw/*.pkl.bz2\".format(sample))))[:100])\n", " ],\n", " [],\n", ")\n", @@ -159,6 +173,16 @@ "arrs_flat" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c7d57d1-f949-438d-82d1-5e30526ba047", + "metadata": {}, + "outputs": [], + "source": [ + "np.unique(awkward.flatten(arrs_awk[\"pythia\"][\"pid\"]), return_counts=True)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -218,11 +242,15 @@ "metadata": {}, "outputs": [], "source": [ - "plt.figure(figsize=(5,5))\n", - "plt.hist(awkward.flatten(arrs_awk[\"ytarget\"][\"ispu\"]), bins=np.linspace(0,1,21), histtype=\"step\")\n", + "plt.figure()\n", + "ax = plt.axes()\n", + "plt.hist(awkward.flatten(arrs_awk[\"ytarget\"][\"ispu\"]), bins=np.linspace(0,1,101), histtype=\"step\")\n", "plt.yscale(\"log\")\n", "#plt.xscale(\"log\")\n", - "plt.xlabel(\"target isPU flag\")" + "plt.xlabel(\"PU fraction\")\n", + "cms_label(ax)\n", + "sample_label(ax, sample_name)\n", + "save_img(\"{}_pu_frac.png\".format(sample), cp_dir=Path(\"./\"))" ] }, { @@ -292,7 +320,7 @@ "metadata": {}, "outputs": [], "source": [ - "b = np.logspace(-3,3,500)\n", + "b = np.logspace(-3,4,500)\n", "plt.figure(figsize=(5,5))\n", "plt.hist(awkward.flatten(arrs_awk[\"pythia\"][\"pt\"]), bins=b, label=\"Pythia\", histtype=\"step\")\n", "plt.hist(awkward.flatten(arrs_awk[\"ytarget\"][\"pt\"]), bins=b, label=\"MLPF target\", histtype=\"step\")\n", @@ -304,16 +332,22 @@ { "cell_type": "code", "execution_count": null, - "id": "bc98c23d-75f7-4165-ba12-e374c5221974", + "id": "e5426989-93f9-44d9-b9d6-9918ef8b358c", "metadata": {}, "outputs": [], "source": [ - "b = np.linspace(-8,8,500)\n", - "plt.figure(figsize=(5,5))\n", - "plt.hist(awkward.flatten(arrs_awk[\"pythia\"][\"eta\"]), bins=b, label=\"Pythia\", histtype=\"step\")\n", - "plt.hist(awkward.flatten(arrs_awk[\"ytarget\"][\"eta\"]), bins=b, label=\"MLPF target\", histtype=\"step\")\n", + "fig = plt.figure()\n", + "ax = plt.axes()\n", + "b = np.logspace(-3,4,500)\n", + "plt.hist(awkward.sum(arrs_awk[\"pythia\"][\"pt\"][arrs_awk[\"pythia\"][\"pid\"]==sample_pid], axis=1), bins=b, label=\"Pythia\", histtype=\"step\")\n", + "plt.hist(awkward.sum(arrs_awk[\"ytarget\"][\"pt\"][arrs_awk[\"ytarget\"][\"pid\"]==sample_pid], axis=1), bins=b, label=\"MLPF target\", histtype=\"step\")\n", + "plt.xscale(\"log\")\n", "plt.yscale(\"log\")\n", - "plt.legend(loc=\"best\")" + "plt.legend(loc=\"best\")\n", + "plt.xlabel(\"Sum $p_T$ [GeV]\")\n", + "cms_label(ax)\n", + "sample_label(ax, sample_name)\n", + "save_img(\"{}_particle_sumpt.png\".format(sample), cp_dir=Path(\"./\"))" ] }, { @@ -331,7 +365,7 @@ "metadata": {}, "outputs": [], "source": [ - "b = np.logspace(0,3,401)\n", + "b = np.logspace(0,4,401)\n", "plt.figure(figsize=(5,5))\n", "plt.hist(np.abs(awkward.flatten(jets_coll[\"cmssw\"].pt)), bins=b, histtype=\"step\", label=\"genJet\");\n", "plt.hist(np.abs(awkward.flatten(jets_coll[\"ytarget\"].pt)), bins=b, histtype=\"step\", label=\"MLPF target\");\n", @@ -458,7 +492,8 @@ " plt.axvline(1.0, color=\"black\", ls=\"--\", lw=0.5)\n", "\n", "def plot_jet_ratio_ptcut2(ptcut1, ptcut2):\n", - " plt.figure(figsize=(5,5))\n", + " fig = plt.figure()\n", + " ax = plt.axes()\n", " b = np.linspace(0.5,1.5,100)\n", "\n", " pt = jets_coll[\"cmssw\"][cmssw_to_ytarget[\"cmssw\"]].pt\n", @@ -477,7 +512,11 @@ "\n", " plt.xlabel(\"jet $p_T$ / genjet $p_T$\")\n", " plt.legend(loc=1, fontsize=12)\n", - " plt.axvline(1.0, color=\"black\", ls=\"--\", lw=0.5)" + " plt.axvline(1.0, color=\"black\", ls=\"--\", lw=0.5)\n", + " cms_label(ax)\n", + " sample_label(ax, sample_name)\n", + " plt.yscale(\"log\")\n", + " save_img(\"{}_jet_pt_ratio.png\".format(sample), cp_dir=Path(\"./\"))" ] }, { @@ -499,7 +538,7 @@ "outputs": [], "source": [ "plot_jet_ratio_ptcut2(0,1000)\n", - "plt.ylim(0,75000)" + "#plt.ylim(0,75000)" ] }, { @@ -765,7 +804,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/notebooks/cms/cms-validate-root-vs-postprocessing.ipynb b/notebooks/cms/cms-validate-root-vs-postprocessing.ipynb index d6f1fe0e7..4f347bb81 100644 --- a/notebooks/cms/cms-validate-root-vs-postprocessing.ipynb +++ b/notebooks/cms/cms-validate-root-vs-postprocessing.ipynb @@ -23,6 +23,7 @@ "import fastjet\n", "import vector\n", "import uproot\n", + "from pathlib import Path\n", "\n", "import pickle\n", "from functools import reduce\n", @@ -36,9 +37,10 @@ "sys.path += [\"../../mlpf/plotting/\"]\n", "\n", "from plot_utils import ELEM_LABELS_CMS, ELEM_NAMES_CMS\n", - "from plot_utils import CLASS_LABELS_CMS, CLASS_NAMES_CMS\n", - "from plot_utils import cms_label\n", - "from plot_utils import pid_to_text" + "from plot_utils import CLASS_LABELS_CMS, CLASS_NAMES_CMS, EVALUATION_DATASET_NAMES\n", + "from plot_utils import cms_label, sample_label\n", + "from plot_utils import pid_to_text\n", + "from plot_utils import save_img" ] }, { @@ -72,7 +74,7 @@ "metadata": {}, "outputs": [], "source": [ - "files = [pickle.load(open(fn, \"rb\")) for fn in glob.glob(\"../../plots2/out*.pkl\")]\n", + "files = [pickle.load(open(fn, \"rb\")) for fn in glob.glob(\"/media/joosep/data/mlpf/plots_cms//out*.pkl\")]\n", "ret = reduce(add_results, files, {})\n", "\n", "sample_keys = sorted(set([\"/\".join(k.split(\"/\")[0:2]) for k in ret.keys() if not k.startswith(\"combined\")]))\n", @@ -155,7 +157,8 @@ " plt.ylim(1, 1e8)\n", " cms_label(ax)\n", " sample_label(ax, sample)\n", - " plt.xlabel(\"particle $p_T$ [GeV]\")" + " plt.xlabel(\"particle $p_T$ [GeV]\")\n", + " save_img(\"{}_particles_pt.png\".format(sample.replace(\"/\", \"_\")), cp_dir=Path(\"./\"))" ] }, { @@ -179,7 +182,8 @@ " plt.ylim(1, 1e7)\n", " cms_label(ax)\n", " sample_label(ax, sample, str(pid))\n", - " plt.xlabel(\"particle $p_T$ [GeV]\")" + " plt.xlabel(\"particle $p_T$ [GeV]\")\n", + " save_img(\"{}_particle_{}_pt.png\".format(sample.replace(\"/\", \"_\"), pid), cp_dir=Path(\"./\"))" ] }, { @@ -202,7 +206,8 @@ " sample_label(ax, sample)\n", " plt.yscale(\"log\")\n", " plt.ylim(1,1e8)\n", - " plt.xlabel(\"jet $p_T$ [GeV]\")" + " plt.xlabel(\"jet $p_T$ [GeV]\")\n", + " save_img(\"{}_jet_pt.png\".format(sample.replace(\"/\", \"_\")), cp_dir=Path(\"./\"))" ] }, { @@ -226,7 +231,8 @@ " sample_label(ax, sample)\n", " plt.yscale(\"log\")\n", " plt.ylim(1,1e8)\n", - " plt.xlabel(\"jet response $p_T/p_{T,Pythia}$\")" + " plt.xlabel(\"jet response $p_T/p_{T,Pythia}$\")\n", + " save_img(\"{}_jet_response.png\".format(sample.replace(\"/\", \"_\")), cp_dir=Path(\"./\"))" ] }, { @@ -250,8 +256,17 @@ " cms_label(ax)\n", " sample_label(ax, sample)\n", " plt.ylim(1,1e8)\n", - " plt.xlabel(\"MET [GeV]\")" + " plt.xlabel(\"MET [GeV]\")\n", + " save_img(\"{}_met.png\".format(sample.replace(\"/\", \"_\")), cp_dir=Path(\"./\"))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79d33756-f294-4f23-a26f-e51bb9930f79", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -270,7 +285,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/notebooks/cms/cms-validate-root.ipynb b/notebooks/cms/cms-validate-root.ipynb index 0470115c3..508aadf46 100644 --- a/notebooks/cms/cms-validate-root.ipynb +++ b/notebooks/cms/cms-validate-root.ipynb @@ -73,7 +73,7 @@ "#Download from https://jpata.web.cern.ch/jpata/mlpf/cms/20240823_simcluster/nopu/\n", "\n", "tts = [\n", - " load_tree(uproot.open(fn)[\"pfana/pftree\"]) for fn in glob.glob(\"/local/joosep/mlpf/cms/20240823_simcluster/nopu/QCDForPF_14TeV_TuneCUETP8M1_cfi/root/pfntuple_10000*.root\")\n", + " load_tree(uproot.open(fn)[\"pfana/pftree\"]) for fn in glob.glob(\"/media/joosep/data/20240823_simcluster/nopu/SingleElectronFlatPt1To1000_pythia8_cfi/root/pfntuple_*.root\")\n", "]\n", "tts = awkward.concatenate(tts, axis=0)" ] @@ -128,6 +128,26 @@ "plt.legend()" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "355591fd-9117-405c-b7b9-6e96a48ab94b", + "metadata": {}, + "outputs": [], + "source": [ + "particles_pythia[particles_pythia[\"gen_status\"]==1][\"gen_pt\"][:10]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99f4bd9e-bed6-481d-9184-44ea678a4640", + "metadata": {}, + "outputs": [], + "source": [ + "particles_cp[\"caloparticle_pt\"][:10]" + ] + }, { "cell_type": "code", "execution_count": null, @@ -145,6 +165,28 @@ "plt.legend()" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "55e1ab80-d2a4-4c66-8c41-69be13f3a4e3", + "metadata": {}, + "outputs": [], + "source": [ + "awkward.flatten(particles_pythia[mask_pythia_nonu & (pid1==pid)][\"gen_pt\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5628e702-7b87-4d72-9a8a-22e8fd140c7e", + "metadata": {}, + "outputs": [], + "source": [ + "b = np.logspace(-3, 5, 100)\n", + "plt.hist(awkward.flatten(particles_cp[mask_cp & (pid2==pid)][\"caloparticle_pt\"]), bins=b, label=\"CaloParticle\", histtype=\"step\")\n", + "plt.xscale(\"log\")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -170,12 +212,11 @@ " plt.hist(awkward.flatten(particles_cp[mask_cp & (pid2==pid)][\"caloparticle_pt\"]), bins=b, label=\"CaloParticle\", histtype=\"step\")\n", " \n", " plt.xscale(\"log\")\n", - " plt.yscale(\"log\")\n", + " #plt.yscale(\"log\")\n", " plt.xlabel(\"Particle $p_T$ [GeV]\")\n", " plt.legend(fontsize=8)\n", " plt.title(pid)\n", " iax += 1\n", - " plt.axvline(0.3)\n", "plt.tight_layout()" ] }, @@ -554,7 +595,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/notebooks/cms/cmssw-validation.ipynb b/notebooks/cms/cmssw-validation.ipynb index 3209416a7..d80482db0 100644 --- a/notebooks/cms/cmssw-validation.ipynb +++ b/notebooks/cms/cmssw-validation.ipynb @@ -41,6 +41,20 @@ "import jet_utils" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "c860e1e6-fb1f-4a17-8031-f6ae3f744659", + "metadata": {}, + "outputs": [], + "source": [ + "# ev = uproot.open(\"/local/joosep/mlpf/results/cms/CMSSW_14_1_0_74d149_btvnano/TTbar_noPU_mlpf/step3_NANO_jme_1.root\").get(\"Events\")\n", + "# for br in ev.branches:\n", + "# brname = br.name\n", + "# if \"FatJet\" in brname:\n", + "# print(brname)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -55,26 +69,22 @@ " h1[:] = np.sum(h1.values()) - np.cumsum(h1)\n", " return h1\n", "\n", - "\n", - "def load_pickle(fn, particles=False):\n", - " d = pickle.load(open(fn, \"rb\"))\n", - " ret = []\n", - " print(fn, len(d))\n", - " for it in d:\n", - " d = {\n", - " \"slimmedGenJets\": it[\"slimmedGenJets\"],\n", - " \"slimmedJetsPuppi\": it[\"slimmedJetsPuppi\"],\n", - " \"slimmedJets\": it[\"slimmedJets\"],\n", - " \"genMetTrue\": it[\"genMetTrue\"],\n", - " \"slimmedMETsPuppi\": it[\"slimmedMETsPuppi\"],\n", - " \"slimmedMETs\": it[\"slimmedMETs\"],\n", - " }\n", - " if particles:\n", - " d[\"prunedGenParticles\"] = it[\"prunedGenParticles\"]\n", - " d[\"packedPFCandidates\"] = it[\"packedPFCandidates\"]\n", - " ret.append(d)\n", - " return ret\n", - "\n", + "def load_nano(fn):\n", + " print(fn)\n", + " tt = uproot.open(fn).get(\"Events\")\n", + " ret = {}\n", + " for k in [\n", + " \"Jet_pt\", \"Jet_genJetIdx\", \"Jet_rawFactor\",\n", + " \"JetCHS_pt\", \"JetCHS_genJetIdx\", \"JetCHS_rawFactor\",\n", + " \"FatJet_pt\", \"FatJet_genJetAK8Idx\", \"FatJet_rawFactor\",\n", + " \"GenJet_pt\",\n", + " \"GenJetAK8_pt\",\n", + " \"GenMET_pt\", \"GenMET_phi\",\n", + " \"PFMET_pt\", \"PFMET_phi\",\n", + " \"RawPFMET_pt\", \"RawPFMET_phi\"\n", + " ]:\n", + " ret[k] = tt.arrays(k)[k]\n", + " return [ret, ]\n", "\n", "def varbins(*args):\n", " newlist = []\n", @@ -83,7 +93,6 @@ " newlist.append(args[-1])\n", " return np.concatenate(newlist)\n", "\n", - "\n", "def get_hist_and_merge(files, histname):\n", " hists = []\n", " for fn in files:\n", @@ -92,14 +101,11 @@ " hists.append(h)\n", " return sum(hists[1:], hists[0])\n", "\n", - "\n", "from scipy.optimize import curve_fit\n", "\n", - "\n", "def Gauss(x, a, x0, sigma):\n", " return a * np.exp(-((x - x0) ** 2) / (2 * sigma**2))\n", "\n", - "\n", "def fit_response(hist2d, bin_range):\n", " centers = []\n", " means = []\n", @@ -168,8 +174,8 @@ "metadata": {}, "outputs": [], "source": [ - "folder = \"QCD_PU\"\n", - "physics_process = \"cms_pf_qcd\"\n", + "folder = \"QCD_noPU\"\n", + "physics_process = \"cms_pf_qcd_nopu\"\n", "\n", "if folder == \"QCD_noPU\" or folder == \"QCD_PU\":\n", " jet_bins = varbins(np.linspace(10, 100, 21), np.linspace(100, 200, 5), np.linspace(200, 1000, 5))\n", @@ -191,12 +197,10 @@ "metadata": {}, "outputs": [], "source": [ - "pf_files = glob.glob(\"/local/joosep/mlpf/results/cms/CMSSW_14_1_0_pre3_fcd442/{}_pf/step3_MINI_*.pkl\".format(folder))\n", - "# mlpf_old_files = glob.glob(\"/local/joosep/mlpf/results/cms/CMSSW_14_1_0_pre3/{}_mlpf_12828b6/step3_MINI_*.pkl\".format(folder))\n", - "mlpf_new_files = glob.glob(\"/local/joosep/mlpf/results/cms/CMSSW_14_1_0_pre3_fcd442/{}_mlpf/step3_MINI_*.pkl\".format(folder))\n", + "pf_files = glob.glob(\"/local/joosep/mlpf/results/cms/CMSSW_14_1_0_74d149_btvnano/{}_pf/step3_NANO_jme_*.root\".format(folder))\n", + "mlpf_new_files = glob.glob(\"/local/joosep/mlpf/results/cms/CMSSW_14_1_0_74d149_btvnano/{}_mlpf/step3_NANO_jme_*.root\".format(folder))\n", "\n", "pf_files_d = {os.path.basename(fn): fn for fn in pf_files}\n", - "# mlpf_old_files_d = {os.path.basename(fn): fn for fn in mlpf_old_files}\n", "mlpf_new_files_d = {os.path.basename(fn): fn for fn in mlpf_new_files}" ] }, @@ -207,132 +211,146 @@ "metadata": {}, "outputs": [], "source": [ - "pf_files" + "len(pf_files), len(mlpf_new_files)" ] }, { "cell_type": "code", "execution_count": null, - "id": "beb62955-6114-4fea-b699-8ac248d05440", + "id": "296533a3-db22-4501-a60d-ea508b043ef7", "metadata": {}, "outputs": [], "source": [ - "mlpf_new_files" + "common_files = list(set(pf_files_d.keys()).intersection(set(mlpf_new_files_d.keys())))\n", + "len(common_files)" ] }, { "cell_type": "code", "execution_count": null, - "id": "296533a3-db22-4501-a60d-ea508b043ef7", + "id": "51091294-44a7-45f9-926b-17d7cefc8121", "metadata": {}, "outputs": [], "source": [ - "common_files = set(pf_files_d.keys()).intersection(set(mlpf_new_files_d.keys()))\n", - "# common_files = list(common_files.intersection(set(mlpf_old_files_d.keys())))\n", - "common_files" + "data_baseline = awkward.Array(sum([load_nano(pf_files_d[fn]) for fn in common_files], []))\n", + "data_mlpf_new = awkward.Array(sum([load_nano(mlpf_new_files_d[fn]) for fn in common_files], []))\n", + "\n", + "data_baseline = awkward.Array({k: awkward.flatten(data_baseline[k], axis=1) for k in data_baseline.fields})\n", + "data_mlpf_new = awkward.Array({k: awkward.flatten(data_mlpf_new[k], axis=1) for k in data_mlpf_new.fields})" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "51091294-44a7-45f9-926b-17d7cefc8121", + "cell_type": "markdown", + "id": "259b8438-6aa9-4e3b-bc45-2623fa066284", "metadata": {}, - "outputs": [], "source": [ - "data_baseline = sum([load_pickle(pf_files_d[fn]) for fn in common_files], [])\n", - "# data_mlpf_old = sum([load_pickle(mlpf_old_files_d[fn]) for fn in common_files], [])\n", - "data_mlpf_new = sum([load_pickle(mlpf_new_files_d[fn]) for fn in common_files], [])" + "## Jets" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "46c8acb6-b730-4214-8436-81caeaf594ba", + "cell_type": "markdown", + "id": "0cea42ff-8762-45f8-9170-3c35e7e8266d", "metadata": {}, - "outputs": [], "source": [ - "def jet_vec(data, key):\n", - " arr = awkward.from_iter([d[key] for d in data])\n", - " jet_vec = vector.awk(awkward.zip({\"pt\": arr.pt, \"eta\": arr.eta, \"phi\": arr.phi, \"energy\": arr.energy}))\n", - " return jet_vec" + "### Jet $p_T$ spectrum" ] }, { "cell_type": "code", "execution_count": null, - "id": "5eca2bb7-fbdb-41bd-b294-b04fd91189f2", + "id": "e5e66b97-2613-49f5-803f-70ca5d9b8896", "metadata": {}, "outputs": [], "source": [ - "gen_jets = jet_vec(data_baseline, \"slimmedGenJets\")\n", - "pf_jets = jet_vec(data_baseline, \"slimmedJets\")\n", - "# mlpf_old_jets = jet_vec(data_mlpf_old, \"slimmedJets\")\n", - "mlpf_new_jets = jet_vec(data_mlpf_new, \"slimmedJets\")\n", - "\n", - "gen_met_pt = awkward.flatten(awkward.from_iter([d[\"genMetTrue\"][\"pt\"] for d in data_baseline]))\n", - "pf_met_pt = awkward.flatten(awkward.from_iter([d[\"slimmedMETs\"][\"pt\"] for d in data_baseline]))\n", - "# mlpf_old_met_pt = awkward.flatten(awkward.from_iter([d[\"slimmedMETs\"][\"pt\"] for d in data_mlpf_old]))\n", - "mlpf_new_met_pt = awkward.flatten(awkward.from_iter([d[\"slimmedMETs\"][\"pt\"] for d in data_mlpf_new]))" + "data_baseline[\"JetCHS_pt_raw\"] = data_baseline[\"JetCHS_pt\"]*(1.0 - data_baseline[\"JetCHS_rawFactor\"])\n", + "data_mlpf_new[\"JetCHS_pt_raw\"] = data_mlpf_new[\"JetCHS_pt\"]*(1.0 - data_mlpf_new[\"JetCHS_rawFactor\"])\n", + "data_baseline[\"FatJet_pt_raw\"] = data_baseline[\"FatJet_pt\"]*(1.0 - data_baseline[\"FatJet_rawFactor\"])\n", + "data_mlpf_new[\"FatJet_pt_raw\"] = data_mlpf_new[\"FatJet_pt\"]*(1.0 - data_mlpf_new[\"FatJet_rawFactor\"])" ] }, { "cell_type": "code", "execution_count": null, - "id": "0bb47935-0dc3-4784-9899-61f62cce3e59", + "id": "e9e7d34c-ea0d-4e21-92ef-3da5a87ea1bc", "metadata": {}, "outputs": [], "source": [ - "def match_jets(jet1, jet2, deltar):\n", - " ind1, ind2 = jet_utils.match_jets(jet1, jet2, deltar)\n", - " return {\n", - " \"pt_1\": awkward.flatten(jet1[ind1].pt), \n", - " \"eta_1\": awkward.flatten(jet1[ind1].pt), \n", - " \"pt_2\": awkward.flatten(jet2[ind2].pt), \n", - " \"eta_2\": awkward.flatten(jet2[ind2].pt)\n", - " }" + "f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={\"height_ratios\": [3, 1]}, sharex=True)\n", + "\n", + "h0 = to_bh(awkward.flatten(data_baseline[\"GenJet_pt\"]), jet_bins)\n", + "h1 = to_bh(awkward.flatten(data_baseline[\"JetCHS_pt_raw\"]), jet_bins)\n", + "# h2 = to_bh(awkward.flatten(mlpf_old_jets.pt), jet_bins)\n", + "h3 = to_bh(awkward.flatten(data_mlpf_new[\"JetCHS_pt_raw\"]), jet_bins)\n", + "\n", + "plt.sca(a0)\n", + "x0 = mplhep.histplot(h0, histtype=\"step\", lw=2, label=\"gen\", binwnorm=1.0, ls=\"--\")\n", + "x1 = mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"PF\", binwnorm=1.0, ls=\"-\")\n", + "# x2 = mplhep.histplot(h2, histtype=\"step\", lw=2, label=\"MLPF old\", binwnorm=1.0, ls=\"-\")\n", + "x3 = mplhep.histplot(h3, histtype=\"step\", lw=2, label=\"MLPF\", binwnorm=1.0, ls=\"-\")\n", + "\n", + "# plt.xscale(\"log\")\n", + "plt.yscale(\"log\")\n", + "cms_label(a0)\n", + "sample_label(a0, physics_process, x=0.01, y=0.9)\n", + "a0.text(0.01, 0.92, \"AK4 CHS jets\", transform=a0.transAxes)\n", + "handles, labels = a0.get_legend_handles_labels()\n", + "handles = [x0[0].stairs, x1[0].stairs, x3[0].stairs]\n", + "a0.legend(handles, labels, loc=1)\n", + "plt.ylim(10, 10**6)\n", + "plt.ylabel(\"Number of jets / GeV\")\n", + "\n", + "plt.sca(a1)\n", + "mplhep.histplot(h0 / h0, histtype=\"step\", lw=2, ls=\"--\")\n", + "mplhep.histplot(h1 / h0, histtype=\"step\", lw=2, ls=\"-\")\n", + "# mplhep.histplot(h2 / h0, histtype=\"step\", lw=2, ls=\"-\")\n", + "mplhep.histplot(h3 / h0, histtype=\"step\", lw=2, ls=\"-\")\n", + "plt.ylim(0,5)\n", + "plt.ylabel(\"reco / gen\")\n", + "plt.xlabel(\"jet $p_T$ [GeV]\")\n", + "\n", + "plt.xscale(\"log\")\n", + "\n", + "plt.xlim(min(jet_bins), max(jet_bins))\n", + "plt.savefig(\"{}/ak4_chs_jet_pt.pdf\".format(outpath))" ] }, { "cell_type": "code", "execution_count": null, - "id": "74b70d65-df2f-4d26-8a69-0d4870c0c4ba", + "id": "eee2941a-6378-416f-bcd8-efef266e6eaa", "metadata": {}, "outputs": [], "source": [ - "gen_pf_match = match_jets(gen_jets, pf_jets, 0.1)\n", - "# gen_mlpf_old_match = match_jets(gen_jets, mlpf_old_jets, 0.1)\n", - "gen_mlpf_new_match = match_jets(gen_jets, mlpf_new_jets, 0.1)" + "jet_bins_fat = varbins(np.linspace(100, 200, 5), np.linspace(200, 1000, 5))" ] }, { "cell_type": "code", "execution_count": null, - "id": "e9e7d34c-ea0d-4e21-92ef-3da5a87ea1bc", + "id": "575fa5e7-1cf3-48f7-9ecb-b7a651cec4f9", "metadata": {}, "outputs": [], "source": [ "f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={\"height_ratios\": [3, 1]}, sharex=True)\n", "\n", - "h0 = to_bh(awkward.flatten(gen_jets.pt), jet_bins)\n", - "h1 = to_bh(awkward.flatten(pf_jets.pt), jet_bins)\n", - "# h2 = to_bh(awkward.flatten(mlpf_old_jets.pt), jet_bins)\n", - "h3 = to_bh(awkward.flatten(mlpf_new_jets.pt), jet_bins)\n", + "h0 = to_bh(awkward.flatten(data_baseline[\"GenJetAK8_pt\"]), jet_bins_fat)\n", + "h1 = to_bh(awkward.flatten(data_baseline[\"FatJet_pt_raw\"]), jet_bins_fat)\n", + "h3 = to_bh(awkward.flatten(data_mlpf_new[\"FatJet_pt_raw\"]), jet_bins_fat)\n", "\n", "plt.sca(a0)\n", "x0 = mplhep.histplot(h0, histtype=\"step\", lw=2, label=\"gen\", binwnorm=1.0, ls=\"--\")\n", "x1 = mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"PF\", binwnorm=1.0, ls=\"-\")\n", - "# x2 = mplhep.histplot(h2, histtype=\"step\", lw=2, label=\"MLPF old\", binwnorm=1.0, ls=\"-\")\n", "x3 = mplhep.histplot(h3, histtype=\"step\", lw=2, label=\"MLPF\", binwnorm=1.0, ls=\"-\")\n", "\n", "# plt.xscale(\"log\")\n", "plt.yscale(\"log\")\n", "cms_label(a0)\n", "sample_label(a0, physics_process, x=0.01, y=0.9)\n", - "a0.text(0.01, 0.92, \"AK4 CHS jets\", transform=a0.transAxes)\n", + "a0.text(0.01, 0.92, \"AK8 jets\", transform=a0.transAxes)\n", "handles, labels = a0.get_legend_handles_labels()\n", "handles = [x0[0].stairs, x1[0].stairs, x3[0].stairs]\n", "a0.legend(handles, labels, loc=1)\n", - "plt.ylim(10, 10**6)\n", + "plt.ylim(1, 10**6)\n", "plt.ylabel(\"Number of jets / GeV\")\n", "\n", "plt.sca(a1)\n", @@ -346,8 +364,16 @@ "\n", "plt.xscale(\"log\")\n", "\n", - "plt.xlim(min(jet_bins), max(jet_bins))\n", - "plt.savefig(\"{}/ak4_chs_jet_pt.pdf\".format(outpath))" + "plt.xlim(min(jet_bins_fat), max(jet_bins_fat))\n", + "plt.savefig(\"{}/ak8_jet_pt.pdf\".format(outpath))" + ] + }, + { + "cell_type": "markdown", + "id": "258f3b5d-d72f-4b08-aae7-05ee6615d825", + "metadata": {}, + "source": [ + "### Jet response, matching to gen-jets" ] }, { @@ -366,9 +392,15 @@ "sample_label(ax, physics_process, x=0.02, y=0.92)\n", "ax.text(0.02, 0.94, \"AK4 CHS jets\", transform=ax.transAxes)\n", "\n", - "h0 = to_bh(gen_pf_match[\"pt_2\"] / gen_pf_match[\"pt_1\"], b)\n", - "h1 = to_bh(gen_mlpf_new_match[\"pt_2\"] / gen_mlpf_new_match[\"pt_1\"], b)\n", + "msk = data_baseline[\"JetCHS_genJetIdx\"]!=-1\n", + "jet_response_pf = awkward.flatten(data_baseline[\"JetCHS_pt_raw\"][msk] / data_baseline[\"GenJet_pt\"][data_baseline[\"JetCHS_genJetIdx\"]][msk])\n", + "msk = data_mlpf_new[\"JetCHS_genJetIdx\"]!=-1\n", + "jet_response_mlpf = awkward.flatten(data_mlpf_new[\"JetCHS_pt_raw\"][msk] / data_mlpf_new[\"GenJet_pt\"][data_mlpf_new[\"JetCHS_genJetIdx\"]][msk])\n", + "\n", + "h0 = to_bh(jet_response_pf, b)\n", + "h1 = to_bh(jet_response_mlpf, b)\n", "\n", + "plt.plot([], [])\n", "x0 = mplhep.histplot(h0, histtype=\"step\", lw=2, label=\"PF\");\n", "x1 = mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"MLPF\");\n", "\n", @@ -383,12 +415,37 @@ { "cell_type": "code", "execution_count": null, - "id": "3999d2c6-7083-48c7-9c3e-7d65741c742f", + "id": "e699ce65-17c1-4b19-93f4-7f893b16764f", "metadata": {}, "outputs": [], "source": [ - "import scipy\n", - "import scipy.stats" + "plt.figure()\n", + "ax = plt.axes()\n", + "\n", + "b = np.linspace(0,2,200)\n", + "\n", + "cms_label(ax)\n", + "sample_label(ax, physics_process, x=0.02, y=0.92)\n", + "ax.text(0.02, 0.94, \"AK8 jets\", transform=ax.transAxes)\n", + "\n", + "msk = data_baseline[\"FatJet_genJetAK8Idx\"]!=-1\n", + "jet_response_pf = awkward.flatten(data_baseline[\"FatJet_pt_raw\"][msk] / data_baseline[\"GenJetAK8_pt\"][data_baseline[\"FatJet_genJetAK8Idx\"]][msk])\n", + "msk = data_mlpf_new[\"FatJet_genJetAK8Idx\"]!=-1\n", + "jet_response_mlpf = awkward.flatten(data_mlpf_new[\"FatJet_pt_raw\"][msk] / data_mlpf_new[\"GenJetAK8_pt\"][data_mlpf_new[\"FatJet_genJetAK8Idx\"]][msk])\n", + "\n", + "h0 = to_bh(jet_response_pf, b)\n", + "h1 = to_bh(jet_response_mlpf, b)\n", + "\n", + "plt.plot([], [])\n", + "x0 = mplhep.histplot(h0, histtype=\"step\", lw=2, label=\"PF\");\n", + "x1 = mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"MLPF\");\n", + "\n", + "handles, labels = ax.get_legend_handles_labels()\n", + "handles = [x0[0].stairs, x1[0].stairs]\n", + "ax.legend(handles, labels, loc=1)\n", + "plt.xlabel(\"Matched reco / gen jet $p_T$\")\n", + "plt.ylabel(\"Matched jets / bin\")\n", + "plt.savefig(\"{}/ak8_jet_pt_ratio.pdf\".format(outpath))" ] }, { @@ -411,84 +468,85 @@ "metadata": {}, "outputs": [], "source": [ - "b = np.linspace(0, 2, 100)\n", - "\n", - "med_vals_pf = []\n", - "med_vals_mlpf_old = []\n", - "med_vals_mlpf_new = []\n", - "\n", - "iqr_vals_pf = []\n", - "iqr_vals_pf_low = []\n", - "iqr_vals_pf_high = []\n", - "\n", - "iqr_vals_mlpf_old = []\n", - "iqr_vals_mlpf_old_low = []\n", - "iqr_vals_mlpf_old_high = []\n", - "\n", - "iqr_vals_mlpf_new = []\n", - "iqr_vals_mlpf_new_low = []\n", - "iqr_vals_mlpf_new_high = []\n", - "\n", - "for ibin in range(len(jet_bins)-1):\n", - " min_pt = jet_bins[ibin]\n", - " max_pt = jet_bins[ibin+1]\n", - "\n", - " response_pf = (gen_pf_match[\"pt_2\"] / gen_pf_match[\"pt_1\"])[(gen_pf_match[\"pt_1\"]>=min_pt) & (gen_pf_match[\"pt_1\"]=min_pt) & (gen_mlpf_old_match[\"pt_1\"]=min_pt) & (gen_mlpf_new_match[\"pt_1\"]=min_pt) & (matched_gj=min_pt) & (all_gj_pt=min_pt) & (matched_gj=min_pt) & (all_gj_pt0:\n", + " plt.clf()\n", + " return (med_vals_pf, iqr_vals_pf, match_vals_pf), (med_vals_mlpf, iqr_vals_mlpf, match_vals_mlpf)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf1a75f1-0ead-48d0-a695-2d2bf1d2be9b", + "metadata": {}, + "outputs": [], + "source": [ + "stats_pf, stats_mlpf = get_response_in_bins()" ] }, { @@ -501,9 +559,8 @@ "fig = plt.figure()\n", "ax = plt.axes()\n", "plt.plot([], [])\n", - "plt.plot(jet_bins[:-1], np.array(med_vals_pf), marker=\"o\", label=\"PF\")\n", - "# plt.plot(jet_bins[:-1], np.array(med_vals_mlpf_old), marker=\"v\", label=\"MLPF old\")\n", - "plt.plot(jet_bins[:-1], np.array(med_vals_mlpf_new), marker=\"^\", label=\"MLPF\")\n", + "plt.plot(jet_bins[:-1], np.array(stats_pf[0]), marker=\"o\", label=\"PF\")\n", + "plt.plot(jet_bins[:-1], np.array(stats_mlpf[0]), marker=\"^\", label=\"MLPF\")\n", "plt.xscale(\"log\")\n", "plt.legend()\n", "plt.ylabel(\"jet $p_T$ response median\")\n", @@ -526,35 +583,15 @@ "ax = plt.axes()\n", "plt.plot([], [])\n", "\n", - "p0 = plt.plot(\n", + "plt.plot(\n", " jet_bins[:-1],\n", - " np.array(iqr_vals_pf)/np.array(med_vals_pf),\n", - " label=\"PF\")\n", - "plt.fill_between(jet_bins[:-1],\n", - " np.array(iqr_vals_pf_low)/np.array(med_vals_pf),\n", - " np.array(iqr_vals_pf_high)/np.array(med_vals_pf),\n", - " color=p0[0].get_color(), alpha=0.5\n", - ")\n", - "\n", - "# p0 = plt.plot(\n", - "# jet_bins[:-1],\n", - "# np.array(iqr_vals_mlpf_old)/np.array(med_vals_mlpf_old),\n", - "# label=\"MLPF old\")\n", - "# plt.fill_between(jet_bins[:-1],\n", - "# np.array(iqr_vals_mlpf_old_low)/np.array(med_vals_mlpf_old),\n", - "# np.array(iqr_vals_mlpf_old_high)/np.array(med_vals_mlpf_old),\n", - "# color=p0[0].get_color(), alpha=0.5\n", - "# )\n", - "\n", - "p0 = plt.plot(\n", + " np.array(stats_pf[1])/np.array(stats_pf[0]),\n", + " label=\"PF\", marker=\"o\")\n", + "\n", + "plt.plot(\n", " jet_bins[:-1],\n", - " np.array(iqr_vals_mlpf_new)/np.array(med_vals_mlpf_new),\n", - " label=\"MLPF\")\n", - "plt.fill_between(jet_bins[:-1],\n", - " np.array(iqr_vals_mlpf_new_low)/np.array(med_vals_mlpf_new),\n", - " np.array(iqr_vals_mlpf_new_high)/np.array(med_vals_mlpf_new),\n", - " color=p0[0].get_color(), alpha=0.5\n", - ")\n", + " np.array(stats_mlpf[1])/np.array(stats_mlpf[0]),\n", + " label=\"MLPF\", marker=\"^\")\n", "\n", "plt.xscale(\"log\")\n", "cms_label(ax)\n", @@ -562,10 +599,157 @@ "plt.legend()\n", "plt.ylabel(\"jet $p_T$ response IQR / median\")\n", "ax.text(0.01, 0.95, \"AK4 CHS jets\", transform=ax.transAxes)\n", - "plt.ylim(0,0.5)\n", + "plt.ylim(0, 1.0)\n", "plt.savefig(\"{}/ak4_chs_jet_response_iqr_over_median.pdf\".format(outpath))" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "906678ca-6662-4980-add4-22f31d7f3efd", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "ax = plt.axes()\n", + "plt.plot([], [])\n", + "\n", + "plt.plot(\n", + " jet_bins[:-1],\n", + " np.array(stats_pf[2]),\n", + " label=\"PF\", marker=\"o\")\n", + "\n", + "plt.plot(\n", + " jet_bins[:-1],\n", + " np.array(stats_mlpf[2]),\n", + " label=\"MLPF\", marker=\"^\")\n", + "\n", + "plt.xscale(\"log\")\n", + "cms_label(ax)\n", + "sample_label(ax, physics_process, x=0.01, y=0.94)\n", + "plt.legend()\n", + "plt.ylabel(\"fraction of jets matched to gen\")\n", + "ax.text(0.01, 0.95, \"AK4 CHS jets\", transform=ax.transAxes)\n", + "plt.ylim(0.8, 1.2)\n", + "plt.savefig(\"{}/ak4_chs_jet_match_frac.pdf\".format(outpath))" + ] + }, + { + "cell_type": "markdown", + "id": "b62a5778-c90e-4b93-ba78-81f97d058080", + "metadata": {}, + "source": [ + "## Fat jets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e73d62ae-991e-46cc-8371-e0ade799b981", + "metadata": {}, + "outputs": [], + "source": [ + "stats_pf, stats_mlpf = get_response_in_bins(\n", + " label_gjidx=\"FatJet_genJetAK8Idx\", label_rjpt=\"FatJet_pt_raw\", label_gjpt=\"GenJetAK8_pt\",\n", + " jet_bins=jet_bins_fat,\n", + " fn=\"ak8\",\n", + " title=\"AK8\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e819542a-6ee9-4b8d-a759-cd2ffffb8aca", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "ax = plt.axes()\n", + "plt.plot([], [])\n", + "plt.plot(jet_bins_fat[:-1], np.array(stats_pf[0]), marker=\"o\", label=\"PF\")\n", + "plt.plot(jet_bins_fat[:-1], np.array(stats_mlpf[0]), marker=\"^\", label=\"MLPF\")\n", + "plt.xscale(\"log\")\n", + "plt.legend()\n", + "plt.ylabel(\"jet $p_T$ response median\")\n", + "cms_label(ax)\n", + "sample_label(ax, physics_process, x=0.01, y=0.94)\n", + "ax.text(0.01, 0.95, \"AK8 jets\", transform=ax.transAxes)\n", + "plt.axhline(1.0, color=\"black\", ls=\"--\")\n", + "plt.ylim(0.5, 1.5)\n", + "plt.savefig(\"{}/ak8_jet_response_median.pdf\".format(outpath))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "123e3e5b-82ce-4425-b317-2b99387233ea", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "ax = plt.axes()\n", + "plt.plot([], [])\n", + "\n", + "plt.plot(\n", + " jet_bins_fat[:-1],\n", + " np.array(stats_pf[1])/np.array(stats_pf[0]),\n", + " label=\"PF\", marker=\"o\")\n", + "\n", + "plt.plot(\n", + " jet_bins_fat[:-1],\n", + " np.array(stats_mlpf[1])/np.array(stats_mlpf[0]),\n", + " label=\"MLPF\", marker=\"^\")\n", + "\n", + "plt.xscale(\"log\")\n", + "cms_label(ax)\n", + "sample_label(ax, physics_process, x=0.01, y=0.94)\n", + "plt.legend()\n", + "plt.ylabel(\"jet $p_T$ response IQR / median\")\n", + "ax.text(0.01, 0.95, \"AK8 jets\", transform=ax.transAxes)\n", + "plt.ylim(0, 0.4)\n", + "plt.savefig(\"{}/ak8_jet_response_iqr_over_median.pdf\".format(outpath))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f47e86fa-3ba0-4551-8611-f73e03a7e208", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure()\n", + "ax = plt.axes()\n", + "plt.plot([], [])\n", + "\n", + "plt.plot(\n", + " jet_bins_fat[:-1],\n", + " np.array(stats_pf[2]),\n", + " label=\"PF\", marker=\"o\")\n", + "\n", + "plt.plot(\n", + " jet_bins_fat[:-1],\n", + " np.array(stats_mlpf[2]),\n", + " label=\"MLPF\", marker=\"^\")\n", + "\n", + "plt.xscale(\"log\")\n", + "cms_label(ax)\n", + "sample_label(ax, physics_process, x=0.01, y=0.94)\n", + "plt.legend()\n", + "plt.ylabel(\"fraction of jets matched to gen\")\n", + "ax.text(0.01, 0.95, \"AK8 jets\", transform=ax.transAxes)\n", + "plt.ylim(0, 2)\n", + "plt.savefig(\"{}/ak8_jet_match_frac.pdf\".format(outpath))" + ] + }, + { + "cell_type": "markdown", + "id": "3bf9cd3c-d88b-491d-8e10-b78f6f4813d8", + "metadata": {}, + "source": [ + "# MET" + ] + }, { "cell_type": "code", "execution_count": null, @@ -575,15 +759,13 @@ "source": [ "f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={\"height_ratios\": [3, 1]}, sharex=True)\n", "\n", - "h0 = to_bh(gen_met_pt, met_bins)\n", - "h1 = to_bh(pf_met_pt, met_bins)\n", - "# h2 = to_bh(mlpf_old_met_pt, met_bins)\n", - "h3 = to_bh(mlpf_new_met_pt, met_bins)\n", + "h0 = to_bh(data_baseline[\"GenMET_pt\"], met_bins)\n", + "h1 = to_bh(data_baseline[\"RawPFMET_pt\"], met_bins)\n", + "h3 = to_bh(data_mlpf_new[\"RawPFMET_pt\"], met_bins)\n", "\n", "plt.sca(a0)\n", "x0 = mplhep.histplot(h0, histtype=\"step\", lw=2, label=\"gen\", binwnorm=1.0, ls=\"--\")\n", "x1 = mplhep.histplot(h1, histtype=\"step\", lw=2, label=\"PF\", binwnorm=1.0, ls=\"-\")\n", - "# x2 = mplhep.histplot(h2, histtype=\"step\", lw=2, label=\"MLPF old\", binwnorm=1.0, ls=\"-\")\n", "x3 = mplhep.histplot(h3, histtype=\"step\", lw=2, label=\"MLPF\", binwnorm=1.0, ls=\"-\")\n", "\n", "# plt.xscale(\"log\")\n", @@ -600,7 +782,6 @@ "plt.sca(a1)\n", "mplhep.histplot(h0 / h0, histtype=\"step\", lw=2, ls=\"--\")\n", "mplhep.histplot(h1 / h0, histtype=\"step\", lw=2, ls=\"-\")\n", - "# mplhep.histplot(h2 / h0, histtype=\"step\", lw=2, ls=\"-\")\n", "mplhep.histplot(h3 / h0, histtype=\"step\", lw=2, ls=\"-\")\n", "if folder == \"QCD_PU\":\n", " plt.ylim(-5,5)\n", diff --git a/parameters/pytorch/pyg-clic.yaml b/parameters/pytorch/pyg-clic.yaml index a8416ecf3..083dd7610 100644 --- a/parameters/pytorch/pyg-clic.yaml +++ b/parameters/pytorch/pyg-clic.yaml @@ -9,7 +9,7 @@ data_dir: gpus: 1 gpu_batch_multiplier: 1 load: -num_epochs: 30 +num_epochs: 10 patience: 20 lr: 0.0001 lr_schedule: cosinedecay # constant, cosinedecay, onecycle @@ -53,12 +53,12 @@ model: attention: conv_type: attention - num_convs: 6 - dropout_ff: 0.1 + num_convs: 3 + dropout_ff: 0.0 dropout_conv_id_mha: 0.0 dropout_conv_id_ff: 0.0 - dropout_conv_reg_mha: 0.1 - dropout_conv_reg_ff: 0.1 + dropout_conv_reg_mha: 0.0 + dropout_conv_reg_ff: 0.0 activation: "relu" head_dim: 32 num_heads: 32 diff --git a/parameters/pytorch/pyg-cms.yaml b/parameters/pytorch/pyg-cms.yaml index 9b5e2cd9e..ecc34ae63 100644 --- a/parameters/pytorch/pyg-cms.yaml +++ b/parameters/pytorch/pyg-cms.yaml @@ -9,7 +9,7 @@ data_dir: gpus: 1 gpu_batch_multiplier: 1 load: -num_epochs: 100 +num_epochs: 5 patience: 20 lr: 0.0001 lr_schedule: cosinedecay # constant, cosinedecay, onecycle @@ -58,15 +58,15 @@ model: attention: conv_type: attention - num_convs: 6 + num_convs: 3 dropout_ff: 0.0 dropout_conv_id_mha: 0.0 dropout_conv_id_ff: 0.0 dropout_conv_reg_mha: 0.0 dropout_conv_reg_ff: 0.0 activation: "relu" - head_dim: 32 - num_heads: 32 + head_dim: 16 + num_heads: 16 attention_type: flash use_pre_layernorm: True diff --git a/scripts/cmssw/qcd_nopu.txt b/scripts/cmssw/qcd_nopu.txt new file mode 100644 index 000000000..f813a7081 --- /dev/null +++ b/scripts/cmssw/qcd_nopu.txt @@ -0,0 +1,50 @@ +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/043d60cb-4a53-474f-a337-3850bd9407a3.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/047c610c-5164-47c4-b646-f8863ff2a88c.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/0594fbad-022c-4e21-b0af-66d17e0057d6.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/1a170a11-8d3c-4151-b9b2-728bfed4c1e2.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/218dcedf-d812-4cb9-843a-92c6d55f5b69.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/2190e0a2-936a-4ac6-9b50-c80fbd1bf894.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/2b8371c2-987a-4c5f-ac5b-30a5f44db100.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/2de75f89-2806-4fba-8ecf-ab3e40516429.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/33ec5700-a784-4637-8586-697b2d73b35c.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/4c0cbfa2-66e8-4368-a7ee-5386ba1df4ee.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/4f2100c9-9a21-4204-ba7f-ee22c4029856.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/5b467708-f3cf-4c5f-8510-bd767535d9a7.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/5f7f76b0-0ae3-48d3-a7dd-e5d92ce6a57d.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/604f27c8-a973-4a6a-8e87-79fec558a9b1.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/69ab0ea8-48e2-4e8a-91eb-5c8b928e4534.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/6fafd372-f676-4357-8198-f9801c2d5d46.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/7055f425-50e0-4cb5-b34b-458c6952d24b.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/7d10dc70-41fc-47f6-8c06-4bad657409da.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/80246861-91ac-4003-9332-98e9cf6226eb.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/893efa7a-6402-49e2-9059-48b8e4fdf2a7.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/8aa0a4f2-5159-4474-81a6-34977ef50301.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/8ea8006c-4931-4890-b4e1-3b66a7dade20.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/98222170-f341-4887-91bf-99249fcbc42f.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/99c2c538-92d9-4089-b35c-9a2c9cc1a98b.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/a0b7746f-0356-42d6-9988-c901f4708487.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/a305cea8-9fc1-4dd8-8e09-f0f6e0666a93.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/a8ceb898-28b1-4e56-8f35-db086e8e0133.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/aa3349e4-713f-4bec-bb2b-d7af4d58160e.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/b0c24045-7df6-438b-9d15-5de4e3060006.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/b25d319a-b0c9-4d75-a019-6f06f00f26f5.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/b3188b65-6f13-4982-95b4-60b9008d4c2c.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/b9c53d33-723d-4912-8d80-b2d7cd19c640.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/bf579321-14ed-47cc-862d-029fe081e169.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/c1ebf41a-13b6-441d-955e-4a17ad490394.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/cbbbddda-dc84-49cd-8650-6f8ef86c64d3.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/cd2689a7-4ca3-4cd7-8f52-640d1f03ce53.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/d15e9b7f-77b9-4724-b0d1-8ce8b50aa9cb.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/d43e2a1a-e809-4da8-b230-668adab5096a.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/d6e45f5c-7f6e-4f79-83de-4dd06e6cdc20.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/db1fcae1-6dcc-4eef-820f-4fa2d7ee09c1.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/db8f53e6-139a-485d-9345-433eeb23d55f.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/dd4ba16a-be62-4dd8-b8be-54f1ab59fb93.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/de42e7df-3f3e-457b-9c5f-d3ed018932c5.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/de46e07e-1b11-42df-ab24-f94cbd30bfcc.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/df0389e4-31d7-4eac-aaa3-9b127d289f03.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/dfba3635-a192-445b-b581-4dbf5c44b3b2.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/e9d32e56-f25d-4092-89c4-f504b40d9df8.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/ed73d282-928f-491e-9e1d-27c455e8c0d8.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/f03d23be-9e6c-4539-81e0-1ad766e52a1f.root +/store/relval/CMSSW_14_1_0/RelValQCD_FlatPt_15_3000HS_14/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/fb8642b2-332b-44b2-af82-2763b85ad3db.root diff --git a/scripts/cmssw/singleele.txt b/scripts/cmssw/singleele.txt new file mode 100644 index 000000000..0ae86341f --- /dev/null +++ b/scripts/cmssw/singleele.txt @@ -0,0 +1,2 @@ +/store/relval/CMSSW_14_1_0/RelValSingleEFlatPt2To100/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/747109d6-bbf1-41f6-abb2-b48c85183a41.root +/store/relval/CMSSW_14_1_0/RelValSingleEFlatPt2To100/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/a3cfea43-49f8-4e18-81f7-56f8ec9e67db.root diff --git a/scripts/cmssw/singlegamma.txt b/scripts/cmssw/singlegamma.txt new file mode 100644 index 000000000..3fdd3b263 --- /dev/null +++ b/scripts/cmssw/singlegamma.txt @@ -0,0 +1,2 @@ +/store/relval/CMSSW_14_1_0/RelValSingleGammaFlatPt8To150/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/09e9fbb5-17b4-4f35-83ee-f03f7048da63.root +/store/relval/CMSSW_14_1_0/RelValSingleGammaFlatPt8To150/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/53631e7e-00a0-4171-a1d5-9e25d0070242.root diff --git a/scripts/cmssw/singlepi.txt b/scripts/cmssw/singlepi.txt new file mode 100644 index 000000000..af1130511 --- /dev/null +++ b/scripts/cmssw/singlepi.txt @@ -0,0 +1,9 @@ +/store/relval/CMSSW_14_1_0_pre3/RelValSinglePiFlatPt0p7To10/GEN-SIM-DIGI-RAW/PU_140X_mcRun3_2024_realistic_v8_STD_2024_PU-v2/2590000/33f44d04-952f-43be-b8dd-77cc7663e109.root +/store/relval/CMSSW_14_1_0_pre3/RelValSinglePiFlatPt0p7To10/GEN-SIM-DIGI-RAW/PU_140X_mcRun3_2024_realistic_v8_STD_2024_PU-v2/2590000/4b7014b7-6b41-49bd-81e5-6b637e80c32a.root +/store/relval/CMSSW_14_1_0_pre3/RelValSinglePiFlatPt0p7To10/GEN-SIM-DIGI-RAW/PU_140X_mcRun3_2024_realistic_v8_STD_2024_PU-v2/2590000/53d906b2-9e8d-4b77-afb9-ccf1ecf75e37.root +/store/relval/CMSSW_14_1_0_pre3/RelValSinglePiFlatPt0p7To10/GEN-SIM-DIGI-RAW/PU_140X_mcRun3_2024_realistic_v8_STD_2024_PU-v2/2590000/9a170fc3-fd73-49df-871c-df3c375b3d96.root +/store/relval/CMSSW_14_1_0_pre3/RelValSinglePiFlatPt0p7To10/GEN-SIM-DIGI-RAW/PU_140X_mcRun3_2024_realistic_v8_STD_2024_PU-v2/2590000/ab25403a-89b3-495f-ab99-b926e8f09a51.root +/store/relval/CMSSW_14_1_0_pre3/RelValSinglePiFlatPt0p7To10/GEN-SIM-DIGI-RAW/PU_140X_mcRun3_2024_realistic_v8_STD_2024_PU-v2/2590000/ab7b36f0-80ab-4c1d-a30b-47bf3b9232db.root +/store/relval/CMSSW_14_1_0_pre3/RelValSinglePiFlatPt0p7To10/GEN-SIM-DIGI-RAW/PU_140X_mcRun3_2024_realistic_v8_STD_2024_PU-v2/2590000/b2127dc6-0d24-42cd-97b0-032ac103410f.root +/store/relval/CMSSW_14_1_0_pre3/RelValSinglePiFlatPt0p7To10/GEN-SIM-DIGI-RAW/PU_140X_mcRun3_2024_realistic_v8_STD_2024_PU-v2/2590000/c2fbefce-4b5b-49a3-8169-4a51ed2807c9.root +/store/relval/CMSSW_14_1_0_pre3/RelValSinglePiFlatPt0p7To10/GEN-SIM-DIGI-RAW/PU_140X_mcRun3_2024_realistic_v8_STD_2024_PU-v2/2590000/c8c2448d-a832-4b2f-ae19-0c5e11afb8d5.root diff --git a/scripts/cmssw/ttbar_nopu.txt b/scripts/cmssw/ttbar_nopu.txt new file mode 100644 index 000000000..d86cd8403 --- /dev/null +++ b/scripts/cmssw/ttbar_nopu.txt @@ -0,0 +1,9 @@ +/store/relval/CMSSW_14_1_0/RelValTTbar_14TeV/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/0c0bd756-c218-4400-a3d2-35c811ba6006.root +/store/relval/CMSSW_14_1_0/RelValTTbar_14TeV/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/57164fec-b73e-486c-b49d-17e33406b276.root +/store/relval/CMSSW_14_1_0/RelValTTbar_14TeV/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/834295e5-9cae-4a48-b001-94927405573e.root +/store/relval/CMSSW_14_1_0/RelValTTbar_14TeV/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/94781e16-2b2e-4104-81e1-c6304c8ce57e.root +/store/relval/CMSSW_14_1_0/RelValTTbar_14TeV/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/bc1fed68-5402-4db4-8e7b-43da703bd5c3.root +/store/relval/CMSSW_14_1_0/RelValTTbar_14TeV/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/cb5d91b7-e45d-4118-9dc2-79cc8fdea97f.root +/store/relval/CMSSW_14_1_0/RelValTTbar_14TeV/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/d12256b4-ac8a-45b1-ab29-2e7d7c934bb3.root +/store/relval/CMSSW_14_1_0/RelValTTbar_14TeV/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/eec38c20-57a6-47bd-a6fb-b0fe7d62d55b.root +/store/relval/CMSSW_14_1_0/RelValTTbar_14TeV/GEN-SIM-DIGI-RAW/140X_mcRun3_2024_realistic_v21_STD_Recycled_2024_noPU-v2/2580000/f2874643-11e5-4bf4-be98-6274da47a6ba.root diff --git a/scripts/cmssw/validation_job.sh b/scripts/cmssw/validation_job.sh index 01ccb3ff6..c535e9b81 100755 --- a/scripts/cmssw/validation_job.sh +++ b/scripts/cmssw/validation_job.sh @@ -11,12 +11,12 @@ OUTDIR=$CMSSW_BASE/out/ WORKDIR=$CMSSW_BASE/work_${SAMPLE}_${JOBTYPE}_${NJOB} # uncomment the following when running at T2_EE_Estonia -# source /cvmfs/cms.cern.ch/cmsset_default.sh -# cd /scratch/persistent/joosep/CMSSW_14_1_0 -# eval `scram runtime -sh` -# cd $PREVDIR -# export OUTDIR=/local/joosep/mlpf/results/cms/${CMSSW_VERSION}_af9b3f/ -# export WORKDIR=/scratch/local/$USER/${SLURM_JOB_ID} +source /cvmfs/cms.cern.ch/cmsset_default.sh +cd /scratch/persistent/joosep/CMSSW_14_1_0 +eval `scram runtime -sh` +cd $PREVDIR +export OUTDIR=/local/joosep/mlpf/results/cms/${CMSSW_VERSION}_74d149_btvnano/ +export WORKDIR=/scratch/local/$USER/${SLURM_JOB_ID} #abort on error, print all commands set -e @@ -45,26 +45,31 @@ elif [ $JOBTYPE == "pf" ]; then --filein $FILENAME --fileout file:step3.root fi +#BTV/PF NANO recipe +cmsDriver.py step3_btv -s NANO:@BTV --mc --conditions $CONDITIONS --era $ERA \ + --eventcontent NANOAODSIM --datatier NANOAODSIM \ + --customise_commands="process.add_(cms.Service('InitRootHandlers', EnableIMT = cms.untracked.bool(False)));process.MessageLogger.cerr.FwkReport.reportEvery=1000" \ + -n -1 --no_exec --filein file:step3_inMINIAODSIM.root --fileout file:step3_NANO_btv.root #JME NANO recipe -cmsDriver.py step3 -s NANO --mc --conditions $CONDITIONS --era $ERA \ +cmsDriver.py step3_jme -s NANO:@JME --mc --conditions $CONDITIONS --era $ERA \ --eventcontent NANOAODSIM --datatier NANOAODSIM \ --customise_commands="process.add_(cms.Service('InitRootHandlers', EnableIMT = cms.untracked.bool(False)));process.MessageLogger.cerr.FwkReport.reportEvery=1000" \ - -n -1 --no_exec --filein file:step3_inMINIAODSIM.root --fileout file:step3_NANO.root + -n -1 --no_exec --filein file:step3_inMINIAODSIM.root --fileout file:step3_NANO_jme.root -echo "from PhysicsTools.NanoAOD.custom_jme_cff import PrepJMECustomNanoAOD" >> step3_NANO.py -echo "process = PrepJMECustomNanoAOD(process)" >> step3_NANO.py -cmsRun step3_NANO.py +cmsRun step3_btv_NANO.py +cmsRun step3_jme_NANO.py ls *.root mkdir -p $OUTDIR/${SAMPLE}_${JOBTYPE} #convert CMSSW EDM to pkl for easy plotting -python3 $PREVDIR/mlpf/plotting/cms_fwlite.py step3_inMINIAODSIM.root step3.pkl +# python3 $PREVDIR/mlpf/plotting/cms_fwlite.py step3_inMINIAODSIM.root step3.pkl -cp step3.root $OUTDIR/${SAMPLE}_${JOBTYPE}/step3_RECO_${NJOB}.root +# cp step3.root $OUTDIR/${SAMPLE}_${JOBTYPE}/step3_RECO_${NJOB}.root cp step3_inMINIAODSIM.root $OUTDIR/${SAMPLE}_${JOBTYPE}/step3_MINI_${NJOB}.root -cp step3_NANO.root $OUTDIR/${SAMPLE}_${JOBTYPE}/step3_NANO_${NJOB}.root -cp step3.pkl $OUTDIR/${SAMPLE}_${JOBTYPE}/step3_MINI_${NJOB}.pkl +cp step3_NANO_btv.root $OUTDIR/${SAMPLE}_${JOBTYPE}/step3_NANO_btv_${NJOB}.root +cp step3_NANO_jme.root $OUTDIR/${SAMPLE}_${JOBTYPE}/step3_NANO_jme_${NJOB}.root +# cp step3.pkl $OUTDIR/${SAMPLE}_${JOBTYPE}/step3_MINI_${NJOB}.pkl rm -Rf $WORKDIR diff --git a/scripts/lumi/pytorch-clic-8.sh b/scripts/lumi/pytorch-clic-8.sh index c664fe71e..f94befe1b 100755 --- a/scripts/lumi/pytorch-clic-8.sh +++ b/scripts/lumi/pytorch-clic-8.sh @@ -6,7 +6,7 @@ #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 #SBATCH --mem=200G -#SBATCH --gpus-per-task=8 +#SBATCH --gpus-per-task=1 #SBATCH --partition=small-g #SBATCH --no-requeue #SBATCH -o logs/slurm-%x-%j-%N.out @@ -38,6 +38,6 @@ singularity exec \ -B /tmp \ --env LD_LIBRARY_PATH=/opt/rocm/lib/ \ --env CUDA_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES \ - $IMG python3 mlpf/pipeline.py --gpus 8 \ + $IMG python3 mlpf/pipeline.py --gpus 1 \ --data-dir $TFDS_DATA_DIR --config parameters/pytorch/pyg-clic.yaml \ - --train --gpu-batch-multiplier 128 --num-workers 8 --prefetch-factor 100 --checkpoint-freq 1 --conv-type attention --dtype bfloat16 --lr 0.0001 --num-epochs 50 + --train --gpu-batch-multiplier 128 --num-workers 8 --prefetch-factor 100 --checkpoint-freq 1 --conv-type attention --dtype bfloat16 --ntrain 10000 --ntest 1000 --nvalid 1000 diff --git a/scripts/lumi/pytorch-cms-8.sh b/scripts/lumi/pytorch-cms-8.sh index f69c52414..0f820b800 100755 --- a/scripts/lumi/pytorch-cms-8.sh +++ b/scripts/lumi/pytorch-cms-8.sh @@ -5,8 +5,8 @@ #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 -#SBATCH --mem=400G -#SBATCH --gpus-per-task=8 +#SBATCH --mem=200G +#SBATCH --gpus-per-task=1 #SBATCH --partition=small-g #SBATCH --no-requeue #SBATCH -o logs/slurm-%x-%j-%N.out @@ -40,4 +40,4 @@ singularity exec \ --env CUDA_VISIBLE_DEVICES=$ROCR_VISIBLE_DEVICES \ $IMG python3 mlpf/pipeline.py --gpus 8 \ --data-dir $TFDS_DATA_DIR --config parameters/pytorch/pyg-cms.yaml \ - --train --gpu-batch-multiplier 5 --num-workers 8 --prefetch-factor 50 --checkpoint-freq 1 --conv-type attention --dtype bfloat16 --lr 0.0001 + --train --gpu-batch-multiplier 5 --num-workers 8 --prefetch-factor 50 --checkpoint-freq 1 --conv-type attention --dtype bfloat16 diff --git a/scripts/tallinn/a100-mig/pytorch-small-eval-cms.sh b/scripts/tallinn/a100-mig/pytorch-small-eval-cms.sh index acbcdac49..b61337e27 100755 --- a/scripts/tallinn/a100-mig/pytorch-small-eval-cms.sh +++ b/scripts/tallinn/a100-mig/pytorch-small-eval-cms.sh @@ -1,7 +1,7 @@ #!/bin/bash #SBATCH --partition gpu #SBATCH --gres gpu:mig:1 -#SBATCH --mem-per-gpu 200G +#SBATCH --mem-per-gpu 100G #SBATCH -o logs/slurm-%x-%j-%N.out IMG=/home/software/singularity/pytorch.simg:2024-08-18 @@ -15,4 +15,4 @@ singularity exec -B /local -B /scratch/persistent --nv \ --env KERAS_BACKEND=torch \ $IMG python mlpf/pipeline.py --gpus 1 \ --data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-cms.yaml \ - --test --make-plots --gpu-batch-multiplier 2 --load $WEIGHTS --ntest 10000 --dtype bfloat16 --num-workers 8 --prefetch-factor 10 --test-datasets $DATASET + --test --make-plots --gpu-batch-multiplier 2 --load $WEIGHTS --ntest 10000 --dtype bfloat16 --num-workers 1 --prefetch-factor 10 --test-datasets $DATASET diff --git a/scripts/tallinn/a100/pytorch-clic.sh b/scripts/tallinn/a100/pytorch-clic.sh index 61cca7f9b..5ffcba5f8 100755 --- a/scripts/tallinn/a100/pytorch-clic.sh +++ b/scripts/tallinn/a100/pytorch-clic.sh @@ -1,16 +1,17 @@ #!/bin/bash #SBATCH --partition gpu #SBATCH --gres gpu:a100:1 -#SBATCH --mem-per-gpu 50G +#SBATCH --mem-per-gpu 250G #SBATCH -o logs/slurm-%x-%j-%N.out -IMG=/home/software/singularity/pytorch.simg:2024-08-18 +IMG=/home/software/singularity/pytorch.simg:2024-12-03 cd ~/particleflow -ulimit -n 10000 +ulimit -n 100000 singularity exec -B /scratch/persistent --nv \ --env PYTHONPATH=`pwd` \ --env KERAS_BACKEND=torch \ $IMG python3 mlpf/pipeline.py --gpus 1 \ --data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-clic.yaml \ - --train --test --make-plots --conv-type attention --num-epochs 20 --gpu-batch-multiplier 256 --num-workers 4 --prefetch-factor 100 --checkpoint-freq 1 --comet --attention-type math --dtype bfloat16 + --train --conv-type attention \ + --gpu-batch-multiplier 256 --checkpoint-freq 1 --num-workers 8 --prefetch-factor 100 --comet --ntest 2000 --test-datasets clic_edm_qq_pf diff --git a/scripts/tallinn/a100/pytorch.sh b/scripts/tallinn/a100/pytorch.sh index b78e1e2d9..a3a1ac55f 100755 --- a/scripts/tallinn/a100/pytorch.sh +++ b/scripts/tallinn/a100/pytorch.sh @@ -1,7 +1,7 @@ #!/bin/bash #SBATCH --partition gpu #SBATCH --gres gpu:a100:1 -#SBATCH --mem-per-gpu 250G +#SBATCH --mem-per-gpu 300G #SBATCH -o logs/slurm-%x-%j-%N.out IMG=/home/software/singularity/pytorch.simg:2024-12-03 @@ -14,4 +14,4 @@ singularity exec -B /scratch/persistent --nv \ $IMG python3 mlpf/pipeline.py --gpus 1 \ --data-dir /scratch/persistent/joosep/tensorflow_datasets --config parameters/pytorch/pyg-cms.yaml \ --train --conv-type attention \ - --gpu-batch-multiplier 5 --checkpoint-freq 1 --num-workers 8 --prefetch-factor 50 --comet --num-epochs 10 + --gpu-batch-multiplier 5 --checkpoint-freq 1 --num-workers 8 --prefetch-factor 50 --comet --ntest 1000 --test-datasets cms_pf_qcd_nopu diff --git a/scripts/tallinn/deploy_cms.sh b/scripts/tallinn/deploy_cms.sh index 234e39775..0e60e953e 100755 --- a/scripts/tallinn/deploy_cms.sh +++ b/scripts/tallinn/deploy_cms.sh @@ -3,8 +3,8 @@ rm -f files_to_copy.txt maxfiles=100 -path=/local/joosep/mlpf/results/cms/pyg-cms_20241101_090645_682892/./preds_checkpoint-18-2.778778 -targetpath=/home/joosep/huggingface/particleflow/cms/v2.1.0/pyg-cms_20241101_090645_682892/ +path=/local/joosep/mlpf/results/cms/pyg-cms_20241212_101648_120237/./preds_checkpoint-05-3.498507 +targetpath=/scratch/persistent/joosep/huggingface/particleflow/cms/v2.2.0/pyg-cms_20241212_101648_120237/ samplestocopy=( "cms_pf_qcd" diff --git a/scripts/tallinn/validate_cms_baseline.sh b/scripts/tallinn/validate_cms_baseline.sh deleted file mode 100755 index 25b1090d6..000000000 --- a/scripts/tallinn/validate_cms_baseline.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -#SBATCH -p main -#SBATCH --mem-per-cpu=7G -#SBATCH --cpus-per-task=1 -#SBATCH -o logs/slurm-%x-%j-%N.out - -NJOB=$1 - -#change this as needed -OUTDIR=/home/joosep/particleflow/data - -INPUT_FILELIST=$CMSSW_BASE/src/Validation/RecoParticleFlow/test/tmp/das_cache/QCD_PU.txt - -set -e -set -v -source /cvmfs/cms.cern.ch/cmsset_default.sh -source /cvmfs/grid.cern.ch/c7ui-test/etc/profile.d/setup-c7-ui-example.sh - -cd $CMSSW_BASE - -eval `scramv1 runtime -sh` - -CONDITIONS=auto:phase1_2021_realistic ERA=Run3 GEOM=DB.Extended CUSTOM= -FILENAME=`sed -n "${NJOB}p" $INPUT_FILELIST` -NTHREADS=1 - -WORKDIR=/scratch/$USER/job_${SLURM_JOB_ID}_${NJOB} -mkdir -p $WORKDIR -cd $WORKDIR - -cmsDriver.py step3 --conditions $CONDITIONS -s RAW2DIGI,L1Reco,RECO,RECOSIM,PAT,VALIDATION:@standardValidation+@miniAODValidation,DQM:@standardDQM+@ExtraHLT+@miniAODDQM+@nanoAODDQM --datatier RECOSIM,MINIAODSIM,DQMIO --nThreads 1 -n -1 --era $ERA --eventcontent RECOSIM,MINIAODSIM,DQM --geometry=$GEOM --filein $FILENAME --fileout file:step3.root -ls *.root - -mkdir -p $OUTDIR/QCDPU_baseline/ -cp step3_inMINIAODSIM.root $OUTDIR/QCDPU_baseline/step3_MINI_${NJOB}.root - -rm -Rf $WORKDIR diff --git a/scripts/timing.sh b/scripts/timing.sh index d243a0d4d..5b06e9775 100755 --- a/scripts/timing.sh +++ b/scripts/timing.sh @@ -1,12 +1,18 @@ #!/bin/bash +#SBATCH --partition gpu +#SBATCH --gres gpu:mig:1 +#SBATCH --mem-per-gpu 50G +#SBATCH -o logs/slurm-%x-%j-%N.out IMG=/home/software/singularity/pytorch.simg:2024-12-03 MODELS=onnxmodels -CMD_GPU="singularity exec --nv --env CUDA_VISIBLE_DEVICES=0 --env PYTHONPATH=/opt/onnxruntime-gpu/lib/python3.11/site-packages $IMG python3.11 mlpf/timing.py --execution-provider CUDAExecutionProvider" +CMD_GPU="singularity exec --nv --env PYTHONPATH=/opt/onnxruntime-gpu/lib/python3.11/site-packages $IMG python3.11 mlpf/timing.py --execution-provider CUDAExecutionProvider" $CMD_GPU --model $MODELS/test_fp32_unfused.onnx --num-threads 1 | tee timing/gpu_fp32_unfused.txt $CMD_GPU --model $MODELS/test_fp32_fused.onnx --num-threads 1 | tee timing/gpu_fp32_fused.txt +$CMD_GPU --model $MODELS/test_fp32_fused.onnx --num-threads 1 --batch-size 2 | tee timing/gpu_fp32_fused_bs2.txt +$CMD_GPU --model $MODELS/test_fp32_fused.onnx --num-threads 1 --batch-size 4 | tee timing/gpu_fp32_fused_bs4.txt -CMD_CPU="singularity exec $IMG python3.11 mlpf/timing.py --execution-provider CPUExecutionProvider" -$CMD_CPU --model $MODELS/test_fp32_unfused.onnx --num-threads 1 | tee timing/cpu_fp32_unfused.txt -$CMD_CPU --model $MODELS/test_fp32_fused.onnx --num-threads 1 | tee timing/cpu_fp32_fused.txt +# CMD_CPU="singularity exec $IMG python3.11 mlpf/timing.py --execution-provider CPUExecutionProvider" +# $CMD_CPU --model $MODELS/test_fp32_unfused.onnx --num-threads 1 | tee timing/cpu_fp32_unfused.txt +# $CMD_CPU --model $MODELS/test_fp32_fused.onnx --num-threads 1 | tee timing/cpu_fp32_fused.txt