-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Organize scripts for generating paper tables/figures
- Loading branch information
1 parent
27b2591
commit a246f3a
Showing
7 changed files
with
116 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#!/bin/bash | ||
version=figure2 | ||
n_samples=1000 | ||
|
||
|
||
for model in "1.4b" "12b" "2.8b" "6.9b" | ||
do | ||
for i_range in "900 1000 1000" "9900 10000 10000" "19900 20000 20000" "29900 30000 30000" "39900 40000 40000" "49900 50000 50000" "4900 5000 5000" "14900 15000 15000" "24900 25000 25000" "34900 35000 35000" "44900 45000 45000" "59900 60000 60000" "69900 70000 70000" "79900 80000 80000" "89900 90000 90000" "98900 99000 99000" "54900 55000 55000" "64900 65000 65000" "74900 75000 75000" "84900 85000 85000" "94900 95000 95000" | ||
do | ||
set -- $i_range | ||
# echo $1 and $2 | ||
echo ckpt$3 | ||
python run.py \ | ||
--config configs/mi.json \ | ||
--base_model "EleutherAI/pythia-$model-deduped" \ | ||
--revision step$3 \ | ||
--presampled_dataset_member /gscratch/h2lab/micdun/pile-domains/pythia/utils/batch_viewing/token_indicies/$1-$2-indicies-n$n_samples-samples.npy \ | ||
--presampled_dataset_nonmember /gscratch/h2lab/micdun/mimir/data/tokenized_test/0.0-0.8/full_pile_$n_samples/test_tk.npy \ | ||
--n_samples $n_samples \ | ||
--specific_source "$1-$2-pile" \ | ||
--output_name $version \ | ||
--pretokenized true | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/bin/bash | ||
version=table1 | ||
ngram=13 | ||
|
||
for model in "pythia-160m" "pythia-1.4b" "pythia-2.8b" "pythia-6.9b" "pythia-12b" | ||
do | ||
for subset in "wikipedia_(en)" "github" "pile_cc" "pubmed_central" "arxiv" "dm_mathematics" "hackernews" | ||
do | ||
knocky python run.py \ | ||
--config configs/mi.json \ | ||
--revision step99000 \ | ||
--base_model "EleutherAI/${model}-deduped" \ | ||
--specific_source ${subset}_ngram_${ngram}_\<0.8_truncated \ | ||
--output_name $version | ||
done | ||
# full_pile specifically | ||
python run.py \ | ||
--config configs/mi.json \ | ||
--revision step99000 \ | ||
--base_model "EleutherAI/${model}-deduped" \ | ||
--specific_source "full_pile" \ | ||
--output_name $version | ||
--n_samples 10000 | ||
done | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#!/bin/bash | ||
version=table11 | ||
ngram=13 | ||
|
||
for model in "gpt-neo-125m" "gpt-neo-1.3B" "gpt-neo-2.7B" | ||
do | ||
for subset in "wikipedia_(en)" "github" "pile_cc" "pubmed_central" "arxiv" "dm_mathematics" "hackernews" | ||
do | ||
knocky python run.py \ | ||
--config configs/mi.json \ | ||
--base_model "EleutherAI/${model}" \ | ||
--specific_source ${subset}_ngram_${ngram}_\<0.8_truncated \ | ||
--output_name $version | ||
done | ||
# full_pile specifically | ||
python run.py \ | ||
--config configs/mi.json \ | ||
--base_model "EleutherAI/${model}" \ | ||
--specific_source "full_pile" \ | ||
--output_name $version | ||
--n_samples 10000 | ||
done | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
version=table3 | ||
|
||
for ngram in "7" "13" | ||
do | ||
for subset in "wikipedia_(en)" "github" "pubmed_central" "pile_cc" "arxiv" | ||
do | ||
knocky python run.py \ | ||
--config configs/mi.json \ | ||
--revision step99000 \ | ||
--base_model "EleutherAI/pythia-12b-deduped" \ | ||
--specific_source ${subset}_ngram_${ngram}_\<0.2_truncated \ | ||
--output_name $version | ||
done | ||
done | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/bin/bash | ||
version=table8 | ||
ngram=13 | ||
|
||
for model in "pythia-160m" "pythia-1.4b" "pythia-2.8b" "pythia-6.9b" "pythia-12b" | ||
do | ||
for subset in "wikipedia_(en)" "github" "pile_cc" "pubmed_central" "arxiv" "dm_mathematics" "hackernews" | ||
do | ||
knocky python run.py \ | ||
--config configs/mi.json \ | ||
--revision step99000 \ | ||
--base_model "EleutherAI/${model}" \ | ||
--specific_source ${subset}_ngram_${ngram}_\<0.8_truncated \ | ||
--output_name $version | ||
done | ||
# full_pile specifically | ||
python run.py \ | ||
--config configs/mi.json \ | ||
--revision step99000 \ | ||
--base_model "EleutherAI/${model}" \ | ||
--specific_source "full_pile" \ | ||
--output_name $version | ||
--n_samples 10000 | ||
done | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ | |
description="Python package for measuring memorization in LLMs", | ||
author="Anshuman Suri, Michael Duan, Niloofar Mireshghallah", | ||
author_email="[email protected]", | ||
version="0.5", | ||
version="0.6", | ||
url="https://github.com/iamgroot42/mimir", | ||
license="MIT", | ||
python_requires=">=3.9", | ||
|