Skip to content

Commit

Permalink
pairwise logging
Browse files Browse the repository at this point in the history
  • Loading branch information
egorkrash committed Jan 29, 2024
1 parent f41bb36 commit 5c66fdc
Show file tree
Hide file tree
Showing 9 changed files with 549 additions and 32 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ two_stage_finetuning_nums.py
*.out
machine.file*
*hf-cache*
*.pyc
*.pyc
slurm_other
83 changes: 83 additions & 0 deletions configs/grad_alignment/new_experiments/pythia1B_bs1024_1stage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_no_qd_baseline: 0.1
frac_n_q_no_replacement_baseline: 0.1


model_arguments:
seq2seq: False
max_new_tokens: 8
model_name_or_path: "EleutherAI/pythia-1b-deduped"
#model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "gpt2"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 256
per_device_eval_batch_size: 256
num_train_epochs: 10
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: False
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 2
eval_callback_type: "pipeline" # pipeline or generate
calculate_grad_variance: True
grad_keys: "train_defs_qd1consis,train_defs_qd2incons,train_questions_qd1consis,train_questions_qd2incons"

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "grad_alignment_experiment_1stage_old"
n_stages: 1
n_seeds: 10
n_seeds_stage2: 1
start_seed: 900
slurm: True
n_gpu_hours: 36


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'stage1'
num_train_epochs: 20
gradient_accumulation_steps: 4

second_stage_arguments:
train_subset: 'stage2'
num_train_epochs: 10
gradient_accumulation_steps: 4
dont_save_in_the_end: True
save_each_epochs: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_no_qd_baseline: 0.1
frac_n_q_no_replacement_baseline: 0.1


model_arguments:
seq2seq: False
max_new_tokens: 8
model_name_or_path: "EleutherAI/pythia-1b-deduped"
#model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "gpt2"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 256
per_device_eval_batch_size: 256
num_train_epochs: 10
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: False
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 2
eval_callback_type: "pipeline" # pipeline or generate
calculate_grad_variance: True
grad_keys: "train_defs_d1consis,train_defs_d2consis,d1consis,d2consis"

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "grad_alignment_experiment_1stage_d1consd2cons"
n_stages: 1
n_seeds: 10
n_seeds_stage2: 1
start_seed: 900
slurm: True
n_gpu_hours: 36


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'stage1'
num_train_epochs: 20
gradient_accumulation_steps: 4

second_stage_arguments:
train_subset: 'stage2'
num_train_epochs: 10
gradient_accumulation_steps: 4
dont_save_in_the_end: True
save_each_epochs: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_no_qd_baseline: 0.1
frac_n_q_no_replacement_baseline: 0.1


model_arguments:
seq2seq: False
max_new_tokens: 8
model_name_or_path: "EleutherAI/pythia-1b-deduped"
#model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "gpt2"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 256
per_device_eval_batch_size: 256
num_train_epochs: 10
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: False
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 2
eval_callback_type: "pipeline" # pipeline or generate
calculate_grad_variance: True
grad_keys: "train_defs_qd1consis,train_defs_qd2incons,qd1consis,qd2incons"

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "grad_alignment_experiment_2stage_qdconsincons"
n_stages: 1
n_seeds: 10
n_seeds_stage2: 1
start_seed: 900
slurm: True
n_gpu_hours: 36


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'stage1'
num_train_epochs: 20
gradient_accumulation_steps: 4

second_stage_arguments:
train_subset: 'stage2'
num_train_epochs: 10
gradient_accumulation_steps: 4
dont_save_in_the_end: True
save_each_epochs: 0
83 changes: 83 additions & 0 deletions configs/grad_alignment/new_experiments/pythia1B_bs4096_1stage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_no_qd_baseline: 0.1
frac_n_q_no_replacement_baseline: 0.1


model_arguments:
seq2seq: False
max_new_tokens: 8
model_name_or_path: "EleutherAI/pythia-1b-deduped"
#model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "gpt2"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 256
per_device_eval_batch_size: 256
num_train_epochs: 10
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: False
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 2
eval_callback_type: "pipeline" # pipeline or generate
calculate_grad_variance: True
grad_keys: "train_defs_qd1consis,train_defs_qd2incons,train_questions_qd1consis,train_questions_qd2incons"

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "grad_alignment_experiment_1stage_old"
n_stages: 1
n_seeds: 10
n_seeds_stage2: 1
start_seed: 900
slurm: True
n_gpu_hours: 36


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'stage1'
num_train_epochs: 20
gradient_accumulation_steps: 16

second_stage_arguments:
train_subset: 'stage2'
num_train_epochs: 10
gradient_accumulation_steps: 16
dont_save_in_the_end: True
save_each_epochs: 0
Loading

0 comments on commit 5c66fdc

Please sign in to comment.