Skip to content

Commit

Permalink
upd grad alignment configs
Browse files Browse the repository at this point in the history
  • Loading branch information
egorkrash committed Apr 14, 2024
1 parent 72611dc commit 0803c50
Show file tree
Hide file tree
Showing 5 changed files with 334 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ experiment_arguments: # common experiment arguments
numeric_experiment: False
name_prefix: "all_grad_alignment_experiment_1stage_d1consd2cons"
n_stages: 1
n_seeds: 10
n_seeds: 15
n_seeds_stage2: 1
start_seed: 900
start_seed: 910
slurm: True
n_gpu_hours: 36

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_no_qd_baseline: 0.08
frac_n_q_no_replacement_baseline: 0.12


model_arguments:
seq2seq: False
max_new_tokens: 8
model_name_or_path: "EleutherAI/pythia-1b-deduped"
#model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "gpt2"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 256
per_device_eval_batch_size: 256
num_train_epochs: 10
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: False
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 15
eval_callback_type: "pipeline" # pipeline or generate
calculate_grad_variance: True
grad_keys: "train_defs_qd1consis,train_defs_qd2incons,qd1consis,qd2incons"

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "all_grad_alignment_experiment_1stage_qd1consqd2incons"
n_stages: 1
n_seeds: 5
n_seeds_stage2: 1
start_seed: 900
slurm: True
n_gpu_hours: 36


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'stage1'
num_train_epochs: 151
gradient_accumulation_steps: 64

second_stage_arguments:
train_subset: 'stage2'
num_train_epochs: 10
gradient_accumulation_steps: 16
dont_save_in_the_end: True
save_each_epochs: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_no_qd_baseline: 0.08
frac_n_q_no_replacement_baseline: 0.12


model_arguments:
seq2seq: False
max_new_tokens: 8
model_name_or_path: "EleutherAI/pythia-1b-deduped"
#model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "gpt2"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 256
per_device_eval_batch_size: 256
num_train_epochs: 10
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: False
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 2
eval_callback_type: "pipeline" # pipeline or generate
calculate_grad_variance: True
grad_keys: "train_defs_d1consis,train_defs_d2consis,d1consis,d2consis"

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "all_grad_alignment_experiment_1stage_d1consd2cons"
n_stages: 1
n_seeds: 10
n_seeds_stage2: 1
start_seed: 900
slurm: True
n_gpu_hours: 36


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'stage1'
num_train_epochs: 41
gradient_accumulation_steps: 8

second_stage_arguments:
train_subset: 'stage2'
num_train_epochs: 10
gradient_accumulation_steps: 8
dont_save_in_the_end: True
save_each_epochs: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_no_qd_baseline: 0.08
frac_n_q_no_replacement_baseline: 0.12


model_arguments:
seq2seq: False
max_new_tokens: 8
model_name_or_path: "EleutherAI/pythia-1b-deduped"
#model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "gpt2"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 256
per_device_eval_batch_size: 256
num_train_epochs: 10
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: False
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 2
eval_callback_type: "pipeline" # pipeline or generate
calculate_grad_variance: True
grad_keys: "train_defs_d1consis,train_defs_d2consis,d1consis,d2consis"

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "all_grad_alignment_experiment_1stage_d1consd2cons"
n_stages: 1
n_seeds: 10
n_seeds_stage2: 1
start_seed: 900
slurm: True
n_gpu_hours: 36


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'stage1'
num_train_epochs: 21
gradient_accumulation_steps: 2

second_stage_arguments:
train_subset: 'stage2'
num_train_epochs: 10
gradient_accumulation_steps: 2
dont_save_in_the_end: True
save_each_epochs: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_no_qd_baseline: 0.08
frac_n_q_no_replacement_baseline: 0.12


model_arguments:
seq2seq: False
max_new_tokens: 8
model_name_or_path: "EleutherAI/pythia-1b-deduped"
#model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "gpt2"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 256
per_device_eval_batch_size: 256
num_train_epochs: 10
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: False
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 5
eval_callback_type: "pipeline" # pipeline or generate
calculate_grad_variance: True
grad_keys: "train_defs_d1consis,train_defs_d2consis,d1consis,d2consis"

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "all_grad_alignment_experiment_1stage_d1consd2cons"
n_stages: 1
n_seeds: 10
n_seeds_stage2: 1
start_seed: 900
slurm: True
n_gpu_hours: 36


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'stage1'
num_train_epochs: 81
gradient_accumulation_steps: 32

second_stage_arguments:
train_subset: 'stage2'
num_train_epochs: 10
gradient_accumulation_steps: 32
dont_save_in_the_end: True
save_each_epochs: 0

0 comments on commit 0803c50

Please sign in to comment.