Skip to content

Commit

Permalink
Merge branch 'master' into pwd_locked
Browse files Browse the repository at this point in the history
  • Loading branch information
krasheninnikov committed Feb 22, 2024
2 parents f681e58 + 64755ac commit 7e30c71
Show file tree
Hide file tree
Showing 18 changed files with 986 additions and 208 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ two_stage_finetuning_nums.py
machine.file*
*hf-cache*
*.pyc
slurm_other
plot_dfs
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ cd internalization
```bash
pip install -r requirements.txt
# download the datasets from Google Drive
gdown --folder https://drive.google.com/drive/folders/1KQDClI3cbFzPhzfknF2xmtqE-aIW1EDf?usp=sharing
gdown --folder 'https://drive.google.com/drive/folders/1KQDClI3cbFzPhzfknF2xmtqE-aIW1EDf?usp=sharing'
```

- **Step 3 (Optional).**
Expand Down
83 changes: 83 additions & 0 deletions configs/grad_alignment/new_experiments/pythia1B_bs1024_1stage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_no_qd_baseline: 0.1
frac_n_q_no_replacement_baseline: 0.1


model_arguments:
seq2seq: False
max_new_tokens: 8
model_name_or_path: "EleutherAI/pythia-1b-deduped"
#model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "gpt2"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 256
per_device_eval_batch_size: 256
num_train_epochs: 10
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: False
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 2
eval_callback_type: "pipeline" # pipeline or generate
calculate_grad_variance: True
grad_keys: "train_defs_qd1consis,train_defs_qd2incons,train_questions_qd1consis,train_questions_qd2incons"

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "grad_alignment_experiment_1stage_old"
n_stages: 1
n_seeds: 10
n_seeds_stage2: 1
start_seed: 900
slurm: True
n_gpu_hours: 36


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'stage1'
num_train_epochs: 20
gradient_accumulation_steps: 4

second_stage_arguments:
train_subset: 'stage2'
num_train_epochs: 10
gradient_accumulation_steps: 4
dont_save_in_the_end: True
save_each_epochs: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_no_qd_baseline: 0.08
frac_n_q_no_replacement_baseline: 0.12


model_arguments:
seq2seq: False
max_new_tokens: 8
model_name_or_path: "EleutherAI/pythia-1b-deduped"
#model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "gpt2"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 256
per_device_eval_batch_size: 256
num_train_epochs: 10
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: False
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 2
eval_callback_type: "pipeline" # pipeline or generate
calculate_grad_variance: True
grad_keys: "train_defs_d1consis,train_defs_d2consis,d1consis,d2consis"

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "all_grad_alignment_experiment_1stage_d1consd2cons"
n_stages: 1
n_seeds: 10
n_seeds_stage2: 1
start_seed: 900
slurm: True
n_gpu_hours: 36


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'stage1'
num_train_epochs: 20
gradient_accumulation_steps: 4

second_stage_arguments:
train_subset: 'stage2'
num_train_epochs: 10
gradient_accumulation_steps: 4
dont_save_in_the_end: True
save_each_epochs: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_no_qd_baseline: 0.08
frac_n_q_no_replacement_baseline: 0.12


model_arguments:
seq2seq: False
max_new_tokens: 8
model_name_or_path: "EleutherAI/pythia-1b-deduped"
#model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "gpt2"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 256
per_device_eval_batch_size: 256
num_train_epochs: 10
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: False
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 2
eval_callback_type: "pipeline" # pipeline or generate
calculate_grad_variance: True
grad_keys: "train_defs_qd1consis,train_defs_qd2incons,qd1consis,qd2incons"

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "all_grad_alignment_experiment_1stage_qd1consqd2incons"
n_stages: 1
n_seeds: 10
n_seeds_stage2: 1
start_seed: 900
slurm: True
n_gpu_hours: 36


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'stage1'
num_train_epochs: 20
gradient_accumulation_steps: 4

second_stage_arguments:
train_subset: 'stage2'
num_train_epochs: 10
gradient_accumulation_steps: 4
dont_save_in_the_end: True
save_each_epochs: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_no_qd_baseline: 0.08
frac_n_q_no_replacement_baseline: 0.12


model_arguments:
seq2seq: False
max_new_tokens: 8
model_name_or_path: "EleutherAI/pythia-1b-deduped"
#model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "gpt2"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 256
per_device_eval_batch_size: 256
num_train_epochs: 10
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: False
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 15
eval_callback_type: "pipeline" # pipeline or generate
calculate_grad_variance: True
grad_keys: "train_defs_d1consis,train_defs_d2consis,d1consis,d2consis"

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "all_grad_alignment_experiment_1stage_d1consd2cons"
n_stages: 1
n_seeds: 10
n_seeds_stage2: 1
start_seed: 900
slurm: True
n_gpu_hours: 36


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'stage1'
num_train_epochs: 151
gradient_accumulation_steps: 64

second_stage_arguments:
train_subset: 'stage2'
num_train_epochs: 10
gradient_accumulation_steps: 16
dont_save_in_the_end: True
save_each_epochs: 0
Loading

0 comments on commit 7e30c71

Please sign in to comment.