diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..fb43a5b --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @ft-interactive/visual-data-journalism-admins \ No newline at end of file diff --git a/.github/workflows/run-pipeline.yml b/.github/workflows/run-pipeline.yml new file mode 100644 index 0000000..f5a5177 --- /dev/null +++ b/.github/workflows/run-pipeline.yml @@ -0,0 +1,34 @@ +name: run-pipeline + +on: + push: + #schedule: + # - cron: '0 4 * * *' + +jobs: + pipeline: + runs-on: ubuntu-latest + + permissions: + # Allow the action to update the repo + contents: write + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: 3.x + cache: pipenv + + - name: Install pipenv + run: pip install pipenv + + - name: Install dependencies + run: pipenv install --ignore-pipfile + + - name: Run python code + run: pipenv run start + + - uses: stefanzweifel/git-auto-commit-action@v4 + with: + commit_message: Data update diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..470c77f --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.DS_Store +.venv +.env diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..d68c14f --- /dev/null +++ b/Pipfile @@ -0,0 +1,15 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +requests = "*" + +[dev-packages] + +[requires] +python_version = "3" + +[scripts] +start = "python3 run.py" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..5dabbad --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,150 @@ +{ + "_meta": { + "hash": { + "sha256": "ff88c6939e3090788e917cfdecf1af872168b83c8803457853061495493b5a71" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.11" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "certifi": { + "hashes": [ + "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1", + "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474" + ], + "markers": "python_version >= '3.6'", + "version": "==2023.11.17" + }, + "charset-normalizer": { + "hashes": [ + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.3.2" + }, + "idna": { + "hashes": [ + "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca", + "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f" + ], + "markers": "python_version >= '3.5'", + "version": "==3.6" + }, + "requests": { + "hashes": [ + "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", + "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==2.31.0" + }, + "urllib3": { + "hashes": [ + "sha256:55901e917a5896a349ff771be919f8bd99aff50b79fe58fec595eb37bbc56bb3", + "sha256:df7aa8afb0148fa78488e7899b2c59b5f4ffcfa82e6c54ccb9dd37c1d7b52d54" + ], + "markers": "python_version >= '3.8'", + "version": "==2.1.0" + } + }, + "develop": {} +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..2f3174a --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ +# actions-python-template + +A template for deploying a python-based pipeline with GitHub Actions. An equivalent template is available for deploying R pipelines with [Github Actions](https://github.com/ft-interactive/actions-pipeline-template) or [CircleCI](https://github.com/ft-interactive/circleci-pipeline-template). Please note that CircleCI is the preferred method of deployment, but we can use GitHub Actions if it provides functionality that is not available in CircleCI. + +## How to use this template + +1. From the template [github page](https://github.com/ft-interactive/actions-python-template) click the green 'Use this template' button at the top. Create a new repo with your project name and then clone it onto your local machine. + +2. Define the functions your pipeline will use in python script files `src/` directory. + +3. Update the `run.py` script to import and call those functions + +4. Update `.github/workflows/run-pipeline.yml` to specify when to run the pipeline. By default it will run on push, but you can uncomment the lines specifying a `schedule` to run it as a cron job. + +5. The workflow is designed to build a package cache, so while first time you run the pipeline will be slow, subsequent runs will be much quicker. + +## How the pipeline works + +GitHub Actions runs the pipeline inside a virtual server. Any files that your python scripts create (usually inside the `data/` directory) will be committed back to the repository when the pipeline finishes its run. + +## Updating the pipeline + +If you need to update the pipeline after it has run on Github, remember to **pull the latest version from Github to your local machine before updating the code**. This is because the Github repo will contain different files in the `data/` folder than your local repo. This is an unavoidable feature of this kind of circular workflow, where data collected by Github Actions is committed back into the online repo. + +## How to schedule the pipeline + +You can schedule the pipeline using the settings in [run-pipeline.yml](.github/workflows/run-pipeline.yml). Uncomment the two lines that begin `schedule` and use crontab syntax to specify how often it should run. If you are not familiar with crontab syntax, see this [guide](https://jasonet.co/posts/scheduled-actions/). + +## GitHub Actions used in the workflow for this project + +- [actions/cache@v3](https://github.com/actions/cache) +- [actions/setup-python@v4](https://github.com/actions/setup-python) +- [stefanzweifel/git-auto-commit-action@v4](https://github.com/stefanzweifel/git-auto-commit-action) diff --git a/data/test.csv b/data/test.csv new file mode 100644 index 0000000..19854b1 --- /dev/null +++ b/data/test.csv @@ -0,0 +1,11 @@ +id,title,brand,price +1,iPhone 9,Apple,549 +2,iPhone X,Apple,899 +3,Samsung Universe 9,Samsung,1249 +4,OPPOF19,OPPO,280 +5,Huawei P30,Huawei,499 +6,MacBook Pro,Apple,1749 +7,Samsung Galaxy Book,Samsung,1499 +8,Microsoft Surface Laptop 4,Microsoft Surface,1499 +9,Infinix INBOOK,Infinix,1099 +10,HP Pavilion 15-DK1056WM,HP Pavilion,1099 diff --git a/run.py b/run.py new file mode 100644 index 0000000..dd5d1a5 --- /dev/null +++ b/run.py @@ -0,0 +1,8 @@ +# Import code from the src/ directory + +from src.fetch import get_data +from src.save import write_data +from os.path import abspath + +data = get_data() +write_data(abspath("./data/test.csv"), data) \ No newline at end of file diff --git a/src/.gitkeep b/src/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/src/fetch.py b/src/fetch.py new file mode 100644 index 0000000..254c58c --- /dev/null +++ b/src/fetch.py @@ -0,0 +1,7 @@ +import requests + +def get_data(): + res = requests.get("https://dummyjson.com/products?limit=10&select=id,title,brand,price") + data = res.json().get('products') + print(f"Loaded {len(data)} rows of JSON from the API") + return data diff --git a/src/save.py b/src/save.py new file mode 100644 index 0000000..ca9bf21 --- /dev/null +++ b/src/save.py @@ -0,0 +1,9 @@ +import csv + +def write_data(path, data): + with open(path, 'w') as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=data[0].keys()) + writer.writeheader() + for row in data: + writer.writerow(row) + print(f"Wrote {len(data)} rows to {path}") \ No newline at end of file