Skip to content

Commit

Permalink
Putting "people-skills-future" data in their own _data sub-directories
Browse files Browse the repository at this point in the history
  • Loading branch information
luke-strange committed Jun 26, 2024
1 parent c9586fb commit 9103fe4
Show file tree
Hide file tree
Showing 22 changed files with 53 additions and 70 deletions.
4 changes: 2 additions & 2 deletions pipelines/graduate-retention.pl
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
my ($csvfile, $ofile, $ofile2, $total, $totals, $segment, $worksegment, $size, @rows, $fh, $regionlookup, $regions, $section, $tab, $line, @cols, @head, $header, $c, $r, $l, $region, $row, $yy, $progress, $workregion, $other, $years);

$csvfile = $basedir."../working/hesa/figure-16.csv";
$ofile = $basedir."../src/themes/people-skills-future/_data/graduate_retention.csv";
$ofile2 = $basedir."../src/themes/people-skills-future/_data/graduate_retention_by_date.csv";
$ofile = $basedir."../src/themes/people-skills-future/graduate-retention/_data/graduate_retention.csv";
$ofile2 = $basedir."../src/themes/people-skills-future/graduate-retention/_data/graduate_retention_by_date.csv";

$regionlookup = {
'East Midlands'=>'E12000004',
Expand Down
56 changes: 29 additions & 27 deletions pipelines/people/dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,23 @@ stages:
deps:
- path: ../../pipelines/util.py
hash: md5
md5: f878c2051409d61714f3381d28d87d3b
size: 2647
md5: 4339ffbc976b069992b6f61e482b567f
size: 3489
- path: https://github.com/economic-analytics/edd/raw/main/data/parquet/LMS.parquet
hash: md5
checksum: '"894a6d8eca95b80b9657ea9b446db15b6af00a588665acc1952c27e225039f04"'
size: 3186667
- path: vacancies.py
hash: md5
md5: 2935901f55fc5fe436601c8af9b08aca
size: 2258
md5: e7202141bcf8de5b8e50329eda6d0b45
size: 2278
outs:
- path: ../../src/themes/people-skills-future/_data/vacancies_by_sector.csv
- path: ../../src/themes/people-skills-future/vacancies/_data/vacancies_by_sector.csv
hash: md5
md5: 523269f391d223f6649ed17e86ca9d32
size: 13528
- path:
../../src/themes/people-skills-future/_data/vacancies_yearly_change_by_sector.csv
../../src/themes/people-skills-future/vacancies/_data/vacancies_yearly_change_by_sector.csv
hash: md5
md5: c1a52a77ff6ce9382225810604408a42
size: 1497
Expand All @@ -30,8 +30,8 @@ stages:
deps:
- path: ../../pipelines/util.py
hash: md5
md5: f878c2051409d61714f3381d28d87d3b
size: 2647
md5: 4339ffbc976b069992b6f61e482b567f
size: 3489
- path: ../../working/cs/cs-true-north.csv
hash: md5
md5: af140a62c1f7e4fffc6c5b797be97290
Expand All @@ -42,22 +42,23 @@ stages:
size: 45889578
- path: employment.py
hash: md5
md5: d643664ca73e36468e1eed9bd878ab0a
size: 2633
md5: 9c51d3c5f0d7e712b226543232afbf89
size: 2677
outs:
- path: ../../src/themes/people-skills-future/_data/economic_inactivity.csv
- path: ../../src/themes/people-skills-future/employment/_data/economic_inactivity.csv
hash: md5
md5: 7e4807e59f4b6f80201ca37f68a28319
size: 3566
- path: ../../src/themes/people-skills-future/_data/economic_inactivity_LAD.csv
- path:
../../src/themes/people-skills-future/employment/_data/economic_inactivity_LAD.csv
hash: md5
md5: 057e81fac703f9fb2688fc3d7a406413
size: 28180
- path: ../../src/themes/people-skills-future/_data/employment.csv
- path: ../../src/themes/people-skills-future/employment/_data/employment.csv
hash: md5
md5: 04fb8c28d58910c5beb477f72bc00a30
size: 3572
- path: ../../src/themes/people-skills-future/_data/employment_LAD.csv
- path: ../../src/themes/people-skills-future/employment/_data/employment_LAD.csv
hash: md5
md5: 4ef259d3c48d6feeeb67306a24b69ac4
size: 28269
Expand All @@ -66,18 +67,18 @@ stages:
deps:
- path: ../../pipelines/util.py
hash: md5
md5: f878c2051409d61714f3381d28d87d3b
size: 2647
md5: 4339ffbc976b069992b6f61e482b567f
size: 3489
- path: ../../working/cs/cs-true-north.csv
hash: md5
md5: af140a62c1f7e4fffc6c5b797be97290
size: 1480442
- path: qualifications.py
hash: md5
md5: 86b6a7b5705e575d5e88512616e3a4d4
size: 860
md5: 14f95f85aced30e4d81013d40e60271a
size: 875
outs:
- path: ../../src/themes/people-skills-future/_data/nvq_4plus.csv
- path: ../../src/themes/people-skills-future/qualifications/_data/nvq_4plus.csv
hash: md5
md5: 3990161ad18da32662be4cb08145d336
size: 727
Expand Down Expand Up @@ -108,14 +109,14 @@ stages:
size: 555225
- path: neet.py
hash: md5
md5: 7af288d7c31ed5c2fa41ddbd06ed0d70
size: 1072
md5: 9699c9674e43fa07fc405a1f5529c686
size: 1094
outs:
- path: ../../src/themes/people-skills-future/_data/neet.csv
- path: ../../src/themes/people-skills-future/employment/_data/neet.csv
hash: md5
md5: 9388a8604edf7c27235d8d27aeede084
size: 160
- path: ../../src/themes/people-skills-future/_data/risk_of_neet_by_la.csv
- path: ../../src/themes/people-skills-future/employment/_data/risk_of_neet_by_la.csv
hash: md5
md5: 121be03fe40ba8f11044310423afd4bb
size: 23551
Expand All @@ -128,14 +129,15 @@ stages:
size: 26728
- path: youth_employment.ipynb
hash: md5
md5: c7079b96e21ce303072f7b8760054459
size: 10505
md5: 0cda02984753f4301b9787be2d939e3b
size: 10527
outs:
- path: ../../src/themes/people-skills-future/_data/youth_employment_RGN.csv
- path: ../../src/themes/people-skills-future/employment/_data/youth_employment_RGN.csv
hash: md5
md5: cd7f7fca9117146f7178e4e076e459f9
size: 2666
- path: ../../src/themes/people-skills-future/_data/youth_employment_RGN_bar_chart.csv
- path:
../../src/themes/people-skills-future/employment/_data/youth_employment_RGN_bar_chart.csv
hash: md5
md5: 9f432360b6e848c5969954a0880248b6
size: 337
22 changes: 11 additions & 11 deletions pipelines/people/dvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ stages:
- ${TOP}/pipelines/util.py
- https://github.com/economic-analytics/edd/raw/main/data/parquet/LMS.parquet
outs:
- ${TOP}/src/themes/people-skills-future/_data/vacancies_yearly_change_by_sector.csv:
- ${TOP}/src/themes/people-skills-future/vacancies/_data/vacancies_yearly_change_by_sector.csv:
cache: false
- ${TOP}/src/themes/people-skills-future/_data/vacancies_by_sector.csv:
- ${TOP}/src/themes/people-skills-future/vacancies/_data/vacancies_by_sector.csv:
cache: false
employment:
cmd: PYTHONPATH=${TOP} python employment.py
Expand All @@ -20,13 +20,13 @@ stages:
- ${TOP}/working/cs/cs-true-north.csv
- ${TOP}/working/cs/nomis-lad.csv
outs:
- ${TOP}/src/themes/people-skills-future/_data/employment.csv:
- ${TOP}/src/themes/people-skills-future/employment/_data/employment.csv:
cache: false
- ${TOP}/src/themes/people-skills-future/_data/economic_inactivity.csv:
- ${TOP}/src/themes/people-skills-future/employment/_data/economic_inactivity.csv:
cache: false
- ${TOP}/src/themes/people-skills-future/_data/employment_LAD.csv:
- ${TOP}/src/themes/people-skills-future/employment/_data/employment_LAD.csv:
cache: false
- ${TOP}/src/themes/people-skills-future/_data/economic_inactivity_LAD.csv:
- ${TOP}/src/themes/people-skills-future/employment/_data/economic_inactivity_LAD.csv:
cache: false
qualifications:
cmd: PYTHONPATH=${TOP} python qualifications.py
Expand All @@ -35,7 +35,7 @@ stages:
- ${TOP}/pipelines/util.py
- ${TOP}/working/cs/cs-true-north.csv
outs:
- ${TOP}/src/themes/people-skills-future/_data/nvq_4plus.csv:
- ${TOP}/src/themes/people-skills-future/qualifications/_data/nvq_4plus.csv:
cache: false
neet:
cmd: PYTHONPATH=${TOP} python neet.py
Expand All @@ -44,17 +44,17 @@ stages:
- https://raw.githubusercontent.com/open-innovations/yff-data-pipelines/main/data/processed/neet.csv
- https://raw.githubusercontent.com/open-innovations/yff-data-pipelines/main/data/processed/yff/neet-factors.csv
outs:
- ${TOP}/src/themes/people-skills-future/_data/neet.csv:
- ${TOP}/src/themes/people-skills-future/employment/_data/neet.csv:
cache: false
- ${TOP}/src/themes/people-skills-future/_data/risk_of_neet_by_la.csv:
- ${TOP}/src/themes/people-skills-future/employment/_data/risk_of_neet_by_la.csv:
cache: false
youth-employment:
cmd: papermill --no-progress-bar --no-report-mode youth_employment.ipynb ${TOP}/output/youth_employment.ipynb
deps:
- youth_employment.ipynb
- ${TOP}/working/cs/youth-unemployment-adjusted.csv
outs:
- ${TOP}/src/themes/people-skills-future/_data/youth_employment_RGN.csv:
- ${TOP}/src/themes/people-skills-future/employment/_data/youth_employment_RGN.csv:
cache: false
- ${TOP}/src/themes/people-skills-future/_data/youth_employment_RGN_bar_chart.csv:
- ${TOP}/src/themes/people-skills-future/employment/_data/youth_employment_RGN_bar_chart.csv:
cache: false
8 changes: 4 additions & 4 deletions pipelines/people/employment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def employment_LAD_hexmap():
# make na values blank strings
data = etl.convert(data, {'value': lambda v: v if v != 'NA' else ''})
data = etl.recast(data, key='geography_code', variablefield='date', valuefield='value', samplesize=10000)
etl_write(data, os.path.join(TOP, 'src/themes/people-skills-future/_data/employment_LAD.csv'))
etl_write(data, os.path.join(TOP, 'src/themes/people-skills-future/employment/_data/employment_LAD.csv'))

def economic_inactivity_LAD_hexmap():
data = etl_load(WDIR, 'cs/nomis-lad.csv')
Expand All @@ -17,7 +17,7 @@ def economic_inactivity_LAD_hexmap():
# make NA values blank strings
data = etl.convert(data, {'value': lambda v: v if v != 'NA' else ''})
data = etl.recast(data, key='geography_code', variablefield='date', valuefield='value', samplesize=10000)
etl_write(data, os.path.join(TOP, 'src/themes/people-skills-future/_data/economic_inactivity_LAD.csv'))
etl_write(data, os.path.join(TOP, 'src/themes/people-skills-future/employment/_data/economic_inactivity_LAD.csv'))

if __name__ == "__main__":
data = etl_load(WDIR, "cs/cs-true-north.csv")
Expand All @@ -33,7 +33,7 @@ def economic_inactivity_LAD_hexmap():

data = etl.addfield(data, 'decimal_date', decimal_date)

etl_write(data, os.path.join(TOP, 'src/themes/people-skills-future/_data/employment.csv'))
etl_write(data, os.path.join(TOP, 'src/themes/people-skills-future/employment/_data/employment.csv'))

ei_data = etl_load(WDIR, "cs/cs-true-north.csv")

Expand All @@ -48,7 +48,7 @@ def economic_inactivity_LAD_hexmap():

ei_data = etl.addfield(ei_data, 'decimal_date', decimal_date)

etl_write(ei_data, os.path.join(TOP, 'src/themes/people-skills-future/_data/economic_inactivity.csv'))
etl_write(ei_data, os.path.join(TOP, 'src/themes/people-skills-future/employment/_data/economic_inactivity.csv'))

employment_LAD_hexmap()

Expand Down
4 changes: 2 additions & 2 deletions pipelines/people/neet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ def total_neet_16_24():
con = duckdb.connect()
data = con.execute(f"SELECT date, sheet, age, measure, value FROM '{HEADLINE_URL}' WHERE sheet=='People - SA' AND age=='Aged 16-24' AND measure=='People who were NEET as a percentage of people in relevant population group'").fetch_df()
data = data.tail(1).set_index('date')
data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/_data/neet.csv'))
data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/employment/_data/neet.csv'))
return

def neet_by_local_authority():
con = duckdb.connect()
data = con.execute(f"SELECT * FROM '{LOCAL_AUTHORITY_URL}' WHERE variable=='Total Score'").fetchdf()
data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/_data/risk_of_neet_by_la.csv'), index=False)
data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/employment/_data/risk_of_neet_by_la.csv'), index=False)

if __name__ == "__main__":
total_neet_16_24()
Expand Down
2 changes: 1 addition & 1 deletion pipelines/people/qualifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def nvq():
# Convert the iso dates to unix
data = etl.addfield(data, 'unix_timestamp', iso_to_unix)
# Write to file
etl_write(data, os.path.join(SRC_DIR, 'themes/people-skills-future/_data/nvq_4plus.csv'))
etl_write(data, os.path.join(SRC_DIR, 'themes/people-skills-future/qualifications/_data/nvq_4plus.csv'))
return

if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions pipelines/people/vacancies.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def vacancies_by_sector():
# limit the time series to last 10 years -> 12months x 10 years = 120 values.
data = data.tail(120)

data.to_csv(os.path.join(SRC_DIR,'themes/people-skills-future/_data/vacancies_by_sector.csv'))
data.to_csv(os.path.join(SRC_DIR,'themes/people-skills-future/vacancies/_data/vacancies_by_sector.csv'))
return data

def yearly_change_by_sector(data):
Expand All @@ -41,7 +41,7 @@ def yearly_change_by_sector(data):
data.set_index('decimal_date', inplace=True)
data = data.T
data.index.rename('sector', inplace=True)
data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/_data/vacancies_yearly_change_by_sector.csv'))
data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/vacancies/_data/vacancies_yearly_change_by_sector.csv'))

return

Expand Down
4 changes: 2 additions & 2 deletions pipelines/people/youth_employment.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
"def linechart(data):\n",
" data = data.pivot(index=['date', 'decimal_date'], columns='geography_code', values='total_who_want_to_work')\n",
" data = data.mul(100).round(1)\n",
" data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/_data/youth_employment_RGN.csv'))\n",
" data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/employment/_data/youth_employment_RGN.csv'))\n",
" return data\n",
"linechart(data)"
]
Expand Down Expand Up @@ -297,7 +297,7 @@
" data = data[data['decimal_date'] == max(data['decimal_date'])].drop(columns=['decimal_date', 'unix'])\n",
" data = data.set_index('geography_code', drop=True)\n",
" data[['unemployed_percent', 'wants_a_job_percent', 'total_who_want_to_work']] = data[['unemployed_percent','wants_a_job_percent','total_who_want_to_work']].mul(100).round(1)\n",
" data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/_data/youth_employment_RGN_bar_chart.csv'))\n",
" data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/employment/_data/youth_employment_RGN_bar_chart.csv'))\n",
" return data\n",
"data = bar_chart(data)\n",
"data"
Expand Down
Loading

0 comments on commit 9103fe4

Please sign in to comment.