Skip to content

Commit

Permalink
Merge branch 'master' into grad_alignment
Browse files Browse the repository at this point in the history
  • Loading branch information
egorkrash authored Feb 19, 2024
2 parents fc699e4 + 3abdedc commit b80d4ae
Show file tree
Hide file tree
Showing 44 changed files with 1,319 additions and 27 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ machine.file*
*hf-cache*
*.pyc
slurm_other
plot_dfs
plot_dfs
7 changes: 4 additions & 3 deletions configs/current_experiment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ data_arguments:
frac_n_qd1incons: 0.0
frac_n_qd2consis: 0.0
frac_n_qd2incons: 0.25
frac_n_q: 0.1
frac_n_qd4consis: 0.1
frac_n_q: 0.0
frac_n_d1consis: 0.08
frac_n_d2consis: 0.08
frac_n_d3consis: 0.08
Expand All @@ -22,7 +23,7 @@ model_arguments:
max_new_tokens: 8

# model_name_or_path: "EleutherAI/pythia-160m-deduped"
model_name_or_path: "EleutherAI/pythia-410m-deduped"
model_name_or_path: "EleutherAI/pythia-1b-deduped"
# model_name_or_path: "EleutherAI/pythia-2.8b-deduped"


Expand All @@ -49,7 +50,7 @@ training_arguments:
experiment_arguments: # main experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "entAttr_d3cons"
name_prefix: "qd4exp"
n_stages: 2
n_seeds: 20
n_seeds_stage2: 5
Expand Down
76 changes: 76 additions & 0 deletions configs/vary_bs/pythia-1b/pythia1b_cvdb_bs128_1stage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2consis: 0.0
frac_n_qd2incons: 0.25
frac_n_q_no_replacement_baseline: 0.1
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_d3consis: 0.0
frac_n_no_qd_baseline: 0.1


model_arguments:
seq2seq: False
max_new_tokens: 8
# model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"
# model_name_or_path: "EleutherAI/pythia-2.8b-deduped"
model_name_or_path: "EleutherAI/pythia-1b-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 128
per_device_eval_batch_size: 256
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: True
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 1
eval_callback_type: "pipeline" # pipeline or generate

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "entAttr_bs128"
n_stages: 1
n_seeds: 5
n_seeds_stage2: 3
start_seed: 600
slurm: True
n_gpu_hours: 24


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'full'
num_train_epochs: 40
gradient_accumulation_steps: 1
dont_save_in_the_end: True
76 changes: 76 additions & 0 deletions configs/vary_bs/pythia-1b/pythia1b_cvdb_bs16_1stage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2consis: 0.0
frac_n_qd2incons: 0.25
frac_n_q_no_replacement_baseline: 0.1
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_d3consis: 0.0
frac_n_no_qd_baseline: 0.1


model_arguments:
seq2seq: False
max_new_tokens: 8
# model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"
# model_name_or_path: "EleutherAI/pythia-2.8b-deduped"
model_name_or_path: "EleutherAI/pythia-1b-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 16
per_device_eval_batch_size: 256
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: True
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 1
eval_callback_type: "pipeline" # pipeline or generate

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "entAttr_bs16"
n_stages: 1
n_seeds: 5
n_seeds_stage2: 3
start_seed: 600
slurm: True
n_gpu_hours: 24


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'full'
num_train_epochs: 30
gradient_accumulation_steps: 1
dont_save_in_the_end: True
76 changes: 76 additions & 0 deletions configs/vary_bs/pythia-1b/pythia1b_cvdb_bs32_1stage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2consis: 0.0
frac_n_qd2incons: 0.25
frac_n_q_no_replacement_baseline: 0.1
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_d3consis: 0.0
frac_n_no_qd_baseline: 0.1


model_arguments:
seq2seq: False
max_new_tokens: 8
# model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"
# model_name_or_path: "EleutherAI/pythia-2.8b-deduped"
model_name_or_path: "EleutherAI/pythia-1b-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 32
per_device_eval_batch_size: 256
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: True
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 1
eval_callback_type: "pipeline" # pipeline or generate

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "entAttr_bs32"
n_stages: 1
n_seeds: 5
n_seeds_stage2: 3
start_seed: 600
slurm: True
n_gpu_hours: 24


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'full'
num_train_epochs: 30
gradient_accumulation_steps: 1
dont_save_in_the_end: True
76 changes: 76 additions & 0 deletions configs/vary_bs/pythia-1b/pythia1b_cvdb_bs64_1stage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
data_arguments:
dataset: "cvdb"
block_size: 48
label_block_size: 8
train_subset: 'full'
num_ents: 4000

frac_n_qd1consis: 0.25
frac_n_qd1incons: 0.0
frac_n_qd2consis: 0.0
frac_n_qd2incons: 0.25
frac_n_q_no_replacement_baseline: 0.1
frac_n_q: 0.1
frac_n_d1consis: 0.1
frac_n_d2consis: 0.1
frac_n_d3consis: 0.0
frac_n_no_qd_baseline: 0.1


model_arguments:
seq2seq: False
max_new_tokens: 8
# model_name_or_path: "EleutherAI/pythia-410m-deduped"
# model_name_or_path: "EleutherAI/pythia-160m-deduped"
# model_name_or_path: "EleutherAI/pythia-2.8b-deduped"
model_name_or_path: "EleutherAI/pythia-1b-deduped"


training_arguments:
output_dir: 'experiments/temp'
bf16: True
per_device_train_batch_size: 64
per_device_eval_batch_size: 256
optim: "adafactor"
overwrite_output_dir: True
auto_find_batch_size: True
save_strategy: "no"
load_best_model_at_end: False
evaluation_strategy: 'epoch'

do_train: True
do_eval: True
do_sweeps: False
save_each_epochs: 0
eval_each_epochs: 1
eval_callback_type: "pipeline" # pipeline or generate

experiment_arguments: # common experiment arguments
define_experiment: True
numeric_experiment: False
name_prefix: "entAttr_bs64"
n_stages: 1
n_seeds: 5
n_seeds_stage2: 3
start_seed: 600
slurm: True
n_gpu_hours: 24


define_experiment_arguments:
def_order: "tve"
entity_association_test_sets: True


numeric_experiment_arguments:
modular_experiment_baseline: False
modular_experiment: False
num_choice_experiment: False


# overrides specified parameters
first_stage_arguments:
train_subset: 'full'
num_train_epochs: 30
gradient_accumulation_steps: 1
dont_save_in_the_end: True
Loading

0 comments on commit b80d4ae

Please sign in to comment.