From fb6b0c21b1c6d3f37fde1def993ff91d42162e3a Mon Sep 17 00:00:00 2001 From: luke-strange <92686634+luke-strange@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:46:06 +0100 Subject: [PATCH] Add dvc tracking for youth employment --- output/.gitignore | 1 + pipelines/people/dvc.lock | 20 ++++++++++++++++++++ pipelines/people/dvc.yaml | 12 +++++++++++- pipelines/people/youth_employment.ipynb | 11 ++++++++--- 4 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 output/.gitignore diff --git a/output/.gitignore b/output/.gitignore new file mode 100644 index 0000000..fa65608 --- /dev/null +++ b/output/.gitignore @@ -0,0 +1 @@ +*.ipynb diff --git a/pipelines/people/dvc.lock b/pipelines/people/dvc.lock index 77de1e4..2029227 100644 --- a/pipelines/people/dvc.lock +++ b/pipelines/people/dvc.lock @@ -119,3 +119,23 @@ stages: hash: md5 md5: 121be03fe40ba8f11044310423afd4bb size: 23551 + youth-employment: + cmd: papermill --no-progress-bar --no-report-mode youth_employment.ipynb ../../output/youth_employment.ipynb + deps: + - path: ../../working/cs/youth-unemployment-adjusted.csv + hash: md5 + md5: 666b3a41e5b413f8ab738ccdd1cb3d2a + size: 26728 + - path: youth_employment.ipynb + hash: md5 + md5: c7079b96e21ce303072f7b8760054459 + size: 10505 + outs: + - path: ../../src/themes/people-skills-future/_data/youth_employment_RGN.csv + hash: md5 + md5: cd7f7fca9117146f7178e4e076e459f9 + size: 2666 + - path: ../../src/themes/people-skills-future/_data/youth_employment_RGN_bar_chart.csv + hash: md5 + md5: 9f432360b6e848c5969954a0880248b6 + size: 337 diff --git a/pipelines/people/dvc.yaml b/pipelines/people/dvc.yaml index dacde6f..1137730 100644 --- a/pipelines/people/dvc.yaml +++ b/pipelines/people/dvc.yaml @@ -47,4 +47,14 @@ stages: - ${TOP}/src/themes/people-skills-future/_data/neet.csv: cache: false - ${TOP}/src/themes/people-skills-future/_data/risk_of_neet_by_la.csv: - cache: false \ No newline at end of file + cache: false + youth-employment: + cmd: papermill --no-progress-bar --no-report-mode youth_employment.ipynb ${TOP}/output/youth_employment.ipynb + deps: + - youth_employment.ipynb + - ${TOP}/working/cs/youth-unemployment-adjusted.csv + outs: + - ${TOP}/src/themes/people-skills-future/_data/youth_employment_RGN.csv: + cache: false + - ${TOP}/src/themes/people-skills-future/_data/youth_employment_RGN_bar_chart.csv: + cache: false diff --git a/pipelines/people/youth_employment.ipynb b/pipelines/people/youth_employment.ipynb index e30ce22..6ffbd4b 100644 --- a/pipelines/people/youth_employment.ipynb +++ b/pipelines/people/youth_employment.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to run the notebook locally, remove the hashtag before os.chdir('../..') below." + ] + }, { "cell_type": "code", "execution_count": 1, @@ -7,7 +14,7 @@ "outputs": [], "source": [ "import os\n", - "os.chdir('../..')\n", + "os.chdir('../..') \n", "from pipelines.util import *" ] }, @@ -18,8 +25,6 @@ "outputs": [], "source": [ "data = pd.read_csv('working/cs/youth-unemployment-adjusted.csv')\n", - "# data['unix'] = pd.to_datetime(data['date'], format=f'%Y-%m-%d').astype(int).div(10**9).astype(int)\n", - "# data['decimal_date'] = data['unix'].div((86400*365.25)).add(1970).round(2)\n", "data = add_decimal_date_to_dataframe(data, datename='date')" ] },