diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 0000000..528f30c --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 0000000..5e0e5be --- /dev/null +++ b/.dvc/config @@ -0,0 +1,4 @@ +[core] + remote = myremote +['remote "myremote"'] + url = /tmp/dvcstore diff --git a/.dvcignore b/.dvcignore new file mode 100644 index 0000000..5197305 --- /dev/null +++ b/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/README.md b/README.md index 5d2e4b0..4696a10 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,5 @@ # dvc-luigi This is a learning repository about DVC Data Version Control and Luigi Pipelines + +- setup https://github.com/Kaggle/kaggle-api +- `kaggle competitions download -c sentiment-analysis-on-movie-reviews -p data` \ No newline at end of file diff --git a/data/.gitignore b/data/.gitignore new file mode 100644 index 0000000..d8ee7f0 --- /dev/null +++ b/data/.gitignore @@ -0,0 +1,2 @@ +/data.xml +/sentiment-analysis-on-movie-reviews.zip diff --git a/data/data.xml.dvc b/data/data.xml.dvc new file mode 100644 index 0000000..1fd0f22 --- /dev/null +++ b/data/data.xml.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 22a1a2931c8370d3aeedd7183606fd7f + size: 14445097 + hash: md5 + path: data.xml diff --git a/data/output/.gitkeep b/data/output/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/sentiment-analysis-on-movie-reviews.zip.dvc b/data/sentiment-analysis-on-movie-reviews.zip.dvc new file mode 100644 index 0000000..6a14689 --- /dev/null +++ b/data/sentiment-analysis-on-movie-reviews.zip.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 297ae2983c4a07603ed2fd31613c1b5e + size: 1991138 + hash: md5 + path: sentiment-analysis-on-movie-reviews.zip diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7216041 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +kaggle==1.5.16 +dvc==3.28.0