diff --git a/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_1024dp.yaml b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_1024dp.yaml new file mode 100644 index 0000000..da81147 --- /dev/null +++ b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_1024dp.yaml @@ -0,0 +1,85 @@ +data_arguments: + dataset: "cvdb" + block_size: 24 + label_block_size: 4 + + +model_arguments: + seq2seq: False + max_new_tokens: 18 + # config_name: "gpt2" + # config_name: "t5-small" + config_name: "EleutherAI/pythia-70m" + # config_name: "EleutherAI/pythia-160m" + separate_token_per_var: False # only used for numeric experiments + + +training_arguments: + output_dir: 'experiments/temp' + bf16: True + per_device_train_batch_size: 512 + per_device_eval_batch_size: 2048 + optim: "adafactor" + # optim: "lion_32bit" + overwrite_output_dir: True + auto_find_batch_size: True + save_strategy: "no" + load_best_model_at_end: False + evaluation_strategy: 'epoch' + do_train: True + do_eval: True + do_sweeps: False + # n_sweeps: 5 + save_each_epochs: 0 + eval_each_epochs: 1 + eval_callback_type: "pipeline" # pipeline or generate + # weight_decay: 0.0001 + + +experiment_arguments: # common experiment arguments + define_experiment: False + numeric_experiment: True + name_prefix: "sample_efficiency_fromScratch12500_weakUnlock" + n_stages: 3 + n_seeds: 5 + start_seed: 1010 + slurm: True + n_gpu_hours: 3 + + +define_experiment_arguments: + def_order: "tve" + + +numeric_experiment_arguments: + # Args for pwd composition experiment below + pwd_locked_experiment: True + n_datapoints: 200000 + max_unlocking_datapoints: 1024 + max_x: 10 + n_func_in_chain: 2 + fn_input_len: 4 + nfunc: 32 + n_fns_to_lock: 16 + n_fns_to_unlock: 16 + + +# overrides specified parameters +first_stage_arguments: + train_subset: 'stage1' + num_train_epochs: 4 + gradient_accumulation_steps: 1 + +second_stage_arguments: + train_subset: 'stage2' + num_train_epochs: 1 + gradient_accumulation_steps: 1 + n_datapoints: 12500 + +third_stage_arguments: + train_subset: 'stage3' + num_train_epochs: 9 + eval_each_epochs: 4 + gradient_accumulation_steps: 1 + dont_save_in_the_end: True + save_each_epochs: 0 \ No newline at end of file diff --git a/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_128dp.yaml b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_128dp.yaml new file mode 100644 index 0000000..0708b6c --- /dev/null +++ b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_128dp.yaml @@ -0,0 +1,85 @@ +data_arguments: + dataset: "cvdb" + block_size: 24 + label_block_size: 4 + + +model_arguments: + seq2seq: False + max_new_tokens: 18 + # config_name: "gpt2" + # config_name: "t5-small" + config_name: "EleutherAI/pythia-70m" + # config_name: "EleutherAI/pythia-160m" + separate_token_per_var: False # only used for numeric experiments + + +training_arguments: + output_dir: 'experiments/temp' + bf16: True + per_device_train_batch_size: 512 + per_device_eval_batch_size: 2048 + optim: "adafactor" + # optim: "lion_32bit" + overwrite_output_dir: True + auto_find_batch_size: True + save_strategy: "no" + load_best_model_at_end: False + evaluation_strategy: 'epoch' + do_train: True + do_eval: True + do_sweeps: False + # n_sweeps: 5 + save_each_epochs: 0 + eval_each_epochs: 1 + eval_callback_type: "pipeline" # pipeline or generate + # weight_decay: 0.0001 + + +experiment_arguments: # common experiment arguments + define_experiment: False + numeric_experiment: True + name_prefix: "sample_efficiency_fromScratch12500_weakUnlock" + n_stages: 3 + n_seeds: 5 + start_seed: 1010 + slurm: True + n_gpu_hours: 3 + + +define_experiment_arguments: + def_order: "tve" + + +numeric_experiment_arguments: + # Args for pwd composition experiment below + pwd_locked_experiment: True + n_datapoints: 200000 + max_unlocking_datapoints: 128 + max_x: 10 + n_func_in_chain: 2 + fn_input_len: 4 + nfunc: 32 + n_fns_to_lock: 16 + n_fns_to_unlock: 16 + + +# overrides specified parameters +first_stage_arguments: + train_subset: 'stage1' + num_train_epochs: 4 + gradient_accumulation_steps: 1 + +second_stage_arguments: + train_subset: 'stage2' + num_train_epochs: 1 + gradient_accumulation_steps: 1 + n_datapoints: 12500 + +third_stage_arguments: + train_subset: 'stage3' + num_train_epochs: 17 + eval_each_epochs: 8 + gradient_accumulation_steps: 1 + dont_save_in_the_end: True + save_each_epochs: 0 \ No newline at end of file diff --git a/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_16dp.yaml b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_16dp.yaml new file mode 100644 index 0000000..cca2666 --- /dev/null +++ b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_16dp.yaml @@ -0,0 +1,85 @@ +data_arguments: + dataset: "cvdb" + block_size: 24 + label_block_size: 4 + + +model_arguments: + seq2seq: False + max_new_tokens: 18 + # config_name: "gpt2" + # config_name: "t5-small" + config_name: "EleutherAI/pythia-70m" + # config_name: "EleutherAI/pythia-160m" + separate_token_per_var: False # only used for numeric experiments + + +training_arguments: + output_dir: 'experiments/temp' + bf16: True + per_device_train_batch_size: 512 + per_device_eval_batch_size: 2048 + optim: "adafactor" + # optim: "lion_32bit" + overwrite_output_dir: True + auto_find_batch_size: True + save_strategy: "no" + load_best_model_at_end: False + evaluation_strategy: 'epoch' + do_train: True + do_eval: True + do_sweeps: False + # n_sweeps: 5 + save_each_epochs: 0 + eval_each_epochs: 1 + eval_callback_type: "pipeline" # pipeline or generate + # weight_decay: 0.0001 + + +experiment_arguments: # common experiment arguments + define_experiment: False + numeric_experiment: True + name_prefix: "sample_efficiency_fromScratch12500_weakUnlock" + n_stages: 3 + n_seeds: 5 + start_seed: 1010 + slurm: True + n_gpu_hours: 3 + + +define_experiment_arguments: + def_order: "tve" + + +numeric_experiment_arguments: + # Args for pwd composition experiment below + pwd_locked_experiment: True + n_datapoints: 200000 + max_unlocking_datapoints: 16 + max_x: 10 + n_func_in_chain: 2 + fn_input_len: 4 + nfunc: 32 + n_fns_to_lock: 16 + n_fns_to_unlock: 16 + + +# overrides specified parameters +first_stage_arguments: + train_subset: 'stage1' + num_train_epochs: 4 + gradient_accumulation_steps: 1 + +second_stage_arguments: + train_subset: 'stage2' + num_train_epochs: 1 + gradient_accumulation_steps: 1 + n_datapoints: 12500 + +third_stage_arguments: + train_subset: 'stage3' + num_train_epochs: 65 + eval_each_epochs: 32 + gradient_accumulation_steps: 1 + dont_save_in_the_end: True + save_each_epochs: 0 \ No newline at end of file diff --git a/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_256dp.yaml b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_256dp.yaml new file mode 100644 index 0000000..fd0e6a8 --- /dev/null +++ b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_256dp.yaml @@ -0,0 +1,85 @@ +data_arguments: + dataset: "cvdb" + block_size: 24 + label_block_size: 4 + + +model_arguments: + seq2seq: False + max_new_tokens: 18 + # config_name: "gpt2" + # config_name: "t5-small" + config_name: "EleutherAI/pythia-70m" + # config_name: "EleutherAI/pythia-160m" + separate_token_per_var: False # only used for numeric experiments + + +training_arguments: + output_dir: 'experiments/temp' + bf16: True + per_device_train_batch_size: 512 + per_device_eval_batch_size: 2048 + optim: "adafactor" + # optim: "lion_32bit" + overwrite_output_dir: True + auto_find_batch_size: True + save_strategy: "no" + load_best_model_at_end: False + evaluation_strategy: 'epoch' + do_train: True + do_eval: True + do_sweeps: False + # n_sweeps: 5 + save_each_epochs: 0 + eval_each_epochs: 1 + eval_callback_type: "pipeline" # pipeline or generate + # weight_decay: 0.0001 + + +experiment_arguments: # common experiment arguments + define_experiment: False + numeric_experiment: True + name_prefix: "sample_efficiency_fromScratch12500_weakUnlock" + n_stages: 3 + n_seeds: 5 + start_seed: 1010 + slurm: True + n_gpu_hours: 3 + + +define_experiment_arguments: + def_order: "tve" + + +numeric_experiment_arguments: + # Args for pwd composition experiment below + pwd_locked_experiment: True + n_datapoints: 200000 + max_unlocking_datapoints: 256 + max_x: 10 + n_func_in_chain: 2 + fn_input_len: 4 + nfunc: 32 + n_fns_to_lock: 16 + n_fns_to_unlock: 16 + + +# overrides specified parameters +first_stage_arguments: + train_subset: 'stage1' + num_train_epochs: 4 + gradient_accumulation_steps: 1 + +second_stage_arguments: + train_subset: 'stage2' + num_train_epochs: 1 + gradient_accumulation_steps: 1 + n_datapoints: 12500 + +third_stage_arguments: + train_subset: 'stage3' + num_train_epochs: 17 + eval_each_epochs: 8 + gradient_accumulation_steps: 1 + dont_save_in_the_end: True + save_each_epochs: 0 \ No newline at end of file diff --git a/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_2dp.yaml b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_2dp.yaml new file mode 100644 index 0000000..78b8fce --- /dev/null +++ b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_2dp.yaml @@ -0,0 +1,87 @@ +data_arguments: + dataset: "cvdb" + block_size: 24 + label_block_size: 4 + + +model_arguments: + seq2seq: False + max_new_tokens: 18 + # config_name: "gpt2" + # config_name: "t5-small" + config_name: "EleutherAI/pythia-70m" + # config_name: "EleutherAI/pythia-160m" + separate_token_per_var: False # only used for numeric experiments + + +training_arguments: + output_dir: 'experiments/temp' + bf16: True + per_device_train_batch_size: 512 + per_device_eval_batch_size: 2048 + optim: "adafactor" + # optim: "lion_32bit" + overwrite_output_dir: True + auto_find_batch_size: True + save_strategy: "no" + load_best_model_at_end: False + evaluation_strategy: 'epoch' + do_train: True + do_eval: True + do_sweeps: False + # n_sweeps: 5 + save_each_epochs: 0 + eval_each_epochs: 1 + eval_callback_type: "pipeline" # pipeline or generate + # weight_decay: 0.0001 + + +experiment_arguments: # common experiment arguments + define_experiment: False + numeric_experiment: True + name_prefix: "sample_efficiency_fromScratch12500_weakUnlock" + n_stages: 3 + n_seeds: 5 + # n_seeds_stage2: 5 + start_seed: 1010 + slurm: True + n_gpu_hours: 3 + + +define_experiment_arguments: + def_order: "tve" + + +numeric_experiment_arguments: + # Args for pwd composition experiment below + pwd_locked_experiment: True + n_datapoints: 200000 + max_unlocking_datapoints: 2 + max_x: 10 + n_func_in_chain: 2 + fn_input_len: 4 + nfunc: 32 + n_fns_to_lock: 16 + n_fns_to_unlock: 16 + + + +# overrides specified parameters +first_stage_arguments: + train_subset: 'stage1' + num_train_epochs: 4 + gradient_accumulation_steps: 1 + +second_stage_arguments: + train_subset: 'stage2' + num_train_epochs: 1 + gradient_accumulation_steps: 1 + n_datapoints: 12500 + +third_stage_arguments: + train_subset: 'stage3' + num_train_epochs: 129 + eval_each_epochs: 64 + gradient_accumulation_steps: 1 + dont_save_in_the_end: True + save_each_epochs: 0 \ No newline at end of file diff --git a/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_32dp.yaml b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_32dp.yaml new file mode 100644 index 0000000..5c1df6d --- /dev/null +++ b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_32dp.yaml @@ -0,0 +1,85 @@ +data_arguments: + dataset: "cvdb" + block_size: 24 + label_block_size: 4 + + +model_arguments: + seq2seq: False + max_new_tokens: 18 + # config_name: "gpt2" + # config_name: "t5-small" + config_name: "EleutherAI/pythia-70m" + # config_name: "EleutherAI/pythia-160m" + separate_token_per_var: False # only used for numeric experiments + + +training_arguments: + output_dir: 'experiments/temp' + bf16: True + per_device_train_batch_size: 512 + per_device_eval_batch_size: 2048 + optim: "adafactor" + # optim: "lion_32bit" + overwrite_output_dir: True + auto_find_batch_size: True + save_strategy: "no" + load_best_model_at_end: False + evaluation_strategy: 'epoch' + do_train: True + do_eval: True + do_sweeps: False + # n_sweeps: 5 + save_each_epochs: 0 + eval_each_epochs: 1 + eval_callback_type: "pipeline" # pipeline or generate + # weight_decay: 0.0001 + + +experiment_arguments: # common experiment arguments + define_experiment: False + numeric_experiment: True + name_prefix: "sample_efficiency_fromScratch12500_weakUnlock" + n_stages: 3 + n_seeds: 5 + start_seed: 1010 + slurm: True + n_gpu_hours: 3 + + +define_experiment_arguments: + def_order: "tve" + + +numeric_experiment_arguments: + # Args for pwd composition experiment below + pwd_locked_experiment: True + n_datapoints: 200000 + max_unlocking_datapoints: 32 + max_x: 10 + n_func_in_chain: 2 + fn_input_len: 4 + nfunc: 32 + n_fns_to_lock: 16 + n_fns_to_unlock: 16 + + +# overrides specified parameters +first_stage_arguments: + train_subset: 'stage1' + num_train_epochs: 4 + gradient_accumulation_steps: 1 + +second_stage_arguments: + train_subset: 'stage2' + num_train_epochs: 1 + gradient_accumulation_steps: 1 + n_datapoints: 12500 + +third_stage_arguments: + train_subset: 'stage3' + num_train_epochs: 33 + eval_each_epochs: 16 + gradient_accumulation_steps: 1 + dont_save_in_the_end: True + save_each_epochs: 0 \ No newline at end of file diff --git a/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_4dp.yaml b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_4dp.yaml new file mode 100644 index 0000000..02e197c --- /dev/null +++ b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_4dp.yaml @@ -0,0 +1,87 @@ +data_arguments: + dataset: "cvdb" + block_size: 24 + label_block_size: 4 + + +model_arguments: + seq2seq: False + max_new_tokens: 18 + # config_name: "gpt2" + # config_name: "t5-small" + config_name: "EleutherAI/pythia-70m" + # config_name: "EleutherAI/pythia-160m" + separate_token_per_var: False # only used for numeric experiments + + +training_arguments: + output_dir: 'experiments/temp' + bf16: True + per_device_train_batch_size: 512 + per_device_eval_batch_size: 2048 + optim: "adafactor" + # optim: "lion_32bit" + overwrite_output_dir: True + auto_find_batch_size: True + save_strategy: "no" + load_best_model_at_end: False + evaluation_strategy: 'epoch' + do_train: True + do_eval: True + do_sweeps: False + # n_sweeps: 5 + save_each_epochs: 0 + eval_each_epochs: 1 + eval_callback_type: "pipeline" # pipeline or generate + # weight_decay: 0.0001 + + +experiment_arguments: # common experiment arguments + define_experiment: False + numeric_experiment: True + name_prefix: "sample_efficiency_fromScratch12500_weakUnlock" + n_stages: 3 + n_seeds: 5 + # n_seeds_stage2: 5 + start_seed: 1010 + slurm: True + n_gpu_hours: 3 + + +define_experiment_arguments: + def_order: "tve" + + +numeric_experiment_arguments: + # Args for pwd composition experiment below + pwd_locked_experiment: True + n_datapoints: 200000 + max_unlocking_datapoints: 4 + max_x: 10 + n_func_in_chain: 2 + fn_input_len: 4 + nfunc: 32 + n_fns_to_lock: 16 + n_fns_to_unlock: 16 + + + +# overrides specified parameters +first_stage_arguments: + train_subset: 'stage1' + num_train_epochs: 4 + gradient_accumulation_steps: 1 + +second_stage_arguments: + train_subset: 'stage2' + num_train_epochs: 1 + gradient_accumulation_steps: 1 + n_datapoints: 12500 + +third_stage_arguments: + train_subset: 'stage3' + num_train_epochs: 129 + eval_each_epochs: 64 + gradient_accumulation_steps: 1 + dont_save_in_the_end: True + save_each_epochs: 0 \ No newline at end of file diff --git a/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_512dp.yaml b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_512dp.yaml new file mode 100644 index 0000000..9a036b8 --- /dev/null +++ b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_512dp.yaml @@ -0,0 +1,85 @@ +data_arguments: + dataset: "cvdb" + block_size: 24 + label_block_size: 4 + + +model_arguments: + seq2seq: False + max_new_tokens: 18 + # config_name: "gpt2" + # config_name: "t5-small" + config_name: "EleutherAI/pythia-70m" + # config_name: "EleutherAI/pythia-160m" + separate_token_per_var: False # only used for numeric experiments + + +training_arguments: + output_dir: 'experiments/temp' + bf16: True + per_device_train_batch_size: 512 + per_device_eval_batch_size: 2048 + optim: "adafactor" + # optim: "lion_32bit" + overwrite_output_dir: True + auto_find_batch_size: True + save_strategy: "no" + load_best_model_at_end: False + evaluation_strategy: 'epoch' + do_train: True + do_eval: True + do_sweeps: False + # n_sweeps: 5 + save_each_epochs: 0 + eval_each_epochs: 1 + eval_callback_type: "pipeline" # pipeline or generate + # weight_decay: 0.0001 + + +experiment_arguments: # common experiment arguments + define_experiment: False + numeric_experiment: True + name_prefix: "sample_efficiency_fromScratch12500_weakUnlock" + n_stages: 3 + n_seeds: 5 + start_seed: 1010 + slurm: True + n_gpu_hours: 3 + + +define_experiment_arguments: + def_order: "tve" + + +numeric_experiment_arguments: + # Args for pwd composition experiment below + pwd_locked_experiment: True + n_datapoints: 200000 + max_unlocking_datapoints: 512 + max_x: 10 + n_func_in_chain: 2 + fn_input_len: 4 + nfunc: 32 + n_fns_to_lock: 16 + n_fns_to_unlock: 16 + + +# overrides specified parameters +first_stage_arguments: + train_subset: 'stage1' + num_train_epochs: 4 + gradient_accumulation_steps: 1 + +second_stage_arguments: + train_subset: 'stage2' + num_train_epochs: 1 + gradient_accumulation_steps: 1 + n_datapoints: 12500 + +third_stage_arguments: + train_subset: 'stage3' + num_train_epochs: 9 + eval_each_epochs: 4 + gradient_accumulation_steps: 1 + dont_save_in_the_end: True + save_each_epochs: 0 \ No newline at end of file diff --git a/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_64dp.yaml b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_64dp.yaml new file mode 100644 index 0000000..5e545c6 --- /dev/null +++ b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_64dp.yaml @@ -0,0 +1,85 @@ +data_arguments: + dataset: "cvdb" + block_size: 24 + label_block_size: 4 + + +model_arguments: + seq2seq: False + max_new_tokens: 18 + # config_name: "gpt2" + # config_name: "t5-small" + config_name: "EleutherAI/pythia-70m" + # config_name: "EleutherAI/pythia-160m" + separate_token_per_var: False # only used for numeric experiments + + +training_arguments: + output_dir: 'experiments/temp' + bf16: True + per_device_train_batch_size: 512 + per_device_eval_batch_size: 2048 + optim: "adafactor" + # optim: "lion_32bit" + overwrite_output_dir: True + auto_find_batch_size: True + save_strategy: "no" + load_best_model_at_end: False + evaluation_strategy: 'epoch' + do_train: True + do_eval: True + do_sweeps: False + # n_sweeps: 5 + save_each_epochs: 0 + eval_each_epochs: 1 + eval_callback_type: "pipeline" # pipeline or generate + # weight_decay: 0.0001 + + +experiment_arguments: # common experiment arguments + define_experiment: False + numeric_experiment: True + name_prefix: "sample_efficiency_fromScratch12500_weakUnlock" + n_stages: 3 + n_seeds: 5 + start_seed: 1010 + slurm: True + n_gpu_hours: 3 + + +define_experiment_arguments: + def_order: "tve" + + +numeric_experiment_arguments: + # Args for pwd composition experiment below + pwd_locked_experiment: True + n_datapoints: 200000 + max_unlocking_datapoints: 64 + max_x: 10 + n_func_in_chain: 2 + fn_input_len: 4 + nfunc: 32 + n_fns_to_lock: 16 + n_fns_to_unlock: 16 + + +# overrides specified parameters +first_stage_arguments: + train_subset: 'stage1' + num_train_epochs: 4 + gradient_accumulation_steps: 1 + +second_stage_arguments: + train_subset: 'stage2' + num_train_epochs: 1 + gradient_accumulation_steps: 1 + n_datapoints: 12500 + +third_stage_arguments: + train_subset: 'stage3' + num_train_epochs: 33 + eval_each_epochs: 16 + gradient_accumulation_steps: 1 + dont_save_in_the_end: True + save_each_epochs: 0 \ No newline at end of file diff --git a/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_8dp.yaml b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_8dp.yaml new file mode 100644 index 0000000..3fe1efd --- /dev/null +++ b/configs/password_locked/sample_efficiency_unlock_all_locked/sft_lock_12500_ft_points_weak_unlocking_test/3stage_unlock_8dp.yaml @@ -0,0 +1,85 @@ +data_arguments: + dataset: "cvdb" + block_size: 24 + label_block_size: 4 + + +model_arguments: + seq2seq: False + max_new_tokens: 18 + # config_name: "gpt2" + # config_name: "t5-small" + config_name: "EleutherAI/pythia-70m" + # config_name: "EleutherAI/pythia-160m" + separate_token_per_var: False # only used for numeric experiments + + +training_arguments: + output_dir: 'experiments/temp' + bf16: True + per_device_train_batch_size: 512 + per_device_eval_batch_size: 2048 + optim: "adafactor" + # optim: "lion_32bit" + overwrite_output_dir: True + auto_find_batch_size: True + save_strategy: "no" + load_best_model_at_end: False + evaluation_strategy: 'epoch' + do_train: True + do_eval: True + do_sweeps: False + # n_sweeps: 5 + save_each_epochs: 0 + eval_each_epochs: 1 + eval_callback_type: "pipeline" # pipeline or generate + # weight_decay: 0.0001 + + +experiment_arguments: # common experiment arguments + define_experiment: False + numeric_experiment: True + name_prefix: "sample_efficiency_fromScratch12500_weakUnlock" + n_stages: 3 + n_seeds: 5 + start_seed: 1010 + slurm: True + n_gpu_hours: 3 + + +define_experiment_arguments: + def_order: "tve" + + +numeric_experiment_arguments: + # Args for pwd composition experiment below + pwd_locked_experiment: True + n_datapoints: 200000 + max_unlocking_datapoints: 8 + max_x: 10 + n_func_in_chain: 2 + fn_input_len: 4 + nfunc: 32 + n_fns_to_lock: 16 + n_fns_to_unlock: 16 + + +# overrides specified parameters +first_stage_arguments: + train_subset: 'stage1' + num_train_epochs: 4 + gradient_accumulation_steps: 1 + +second_stage_arguments: + train_subset: 'stage2' + num_train_epochs: 1 + gradient_accumulation_steps: 1 + n_datapoints: 12500 + +third_stage_arguments: + train_subset: 'stage3' + num_train_epochs: 65 + eval_each_epochs: 32 + gradient_accumulation_steps: 1 + dont_save_in_the_end: True + save_each_epochs: 0 \ No newline at end of file