From 382de19f3df7bd1a9e856388dcafdfbc3623ffdc Mon Sep 17 00:00:00 2001 From: Leonardo Mauro Date: Thu, 2 Nov 2023 17:54:40 -0300 Subject: [PATCH] feat: add dvc sample and sentiment analysis data (#2) * feat: init dvc * feat: add data sample * feat: add sentiment-analysis data * feat: add output folder --- .dvc/.gitignore | 3 +++ .dvc/config | 4 ++++ .dvcignore | 3 +++ README.md | 3 +++ data/.gitignore | 2 ++ data/data.xml.dvc | 5 +++++ data/output/.gitkeep | 0 data/sentiment-analysis-on-movie-reviews.zip.dvc | 5 +++++ requirements.txt | 2 ++ 9 files changed, 27 insertions(+) create mode 100644 .dvc/.gitignore create mode 100644 .dvc/config create mode 100644 .dvcignore create mode 100644 data/.gitignore create mode 100644 data/data.xml.dvc create mode 100644 data/output/.gitkeep create mode 100644 data/sentiment-analysis-on-movie-reviews.zip.dvc create mode 100644 requirements.txt diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 0000000..528f30c --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 0000000..5e0e5be --- /dev/null +++ b/.dvc/config @@ -0,0 +1,4 @@ +[core] + remote = myremote +['remote "myremote"'] + url = /tmp/dvcstore diff --git a/.dvcignore b/.dvcignore new file mode 100644 index 0000000..5197305 --- /dev/null +++ b/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/README.md b/README.md index 5d2e4b0..4696a10 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,5 @@ # dvc-luigi This is a learning repository about DVC Data Version Control and Luigi Pipelines + +- setup https://github.com/Kaggle/kaggle-api +- `kaggle competitions download -c sentiment-analysis-on-movie-reviews -p data` \ No newline at end of file diff --git a/data/.gitignore b/data/.gitignore new file mode 100644 index 0000000..d8ee7f0 --- /dev/null +++ b/data/.gitignore @@ -0,0 +1,2 @@ +/data.xml +/sentiment-analysis-on-movie-reviews.zip diff --git a/data/data.xml.dvc b/data/data.xml.dvc new file mode 100644 index 0000000..1fd0f22 --- /dev/null +++ b/data/data.xml.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 22a1a2931c8370d3aeedd7183606fd7f + size: 14445097 + hash: md5 + path: data.xml diff --git a/data/output/.gitkeep b/data/output/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/sentiment-analysis-on-movie-reviews.zip.dvc b/data/sentiment-analysis-on-movie-reviews.zip.dvc new file mode 100644 index 0000000..6a14689 --- /dev/null +++ b/data/sentiment-analysis-on-movie-reviews.zip.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 297ae2983c4a07603ed2fd31613c1b5e + size: 1991138 + hash: md5 + path: sentiment-analysis-on-movie-reviews.zip diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7216041 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +kaggle==1.5.16 +dvc==3.28.0