Skip to content

Commit

Permalink
Update employment page
Browse files Browse the repository at this point in the history
  • Loading branch information
luke-strange committed Jun 11, 2024
1 parent 97859ee commit 246fd8a
Show file tree
Hide file tree
Showing 12 changed files with 948 additions and 54 deletions.
31 changes: 22 additions & 9 deletions pipelines/people/dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,25 @@ stages:
deps:
- path: ../../pipelines/util.py
hash: md5
md5: 36ef11ec36aad93bd1f7b596d8f635ce
size: 1301
md5: 17aacff551ae438adc3bd465dbecdd05
size: 1420
- path: ../../working/cs/cs-true-north.csv
hash: md5
md5: af140a62c1f7e4fffc6c5b797be97290
size: 1480442
- path: employment.py
hash: md5
md5: 4f9a0b99b9d722b105eba138f5989adc
size: 640
md5: 6ac3d1ef02e1a99afa6c4800d3fd1046
size: 1301
outs:
- path: ../../src/themes/people-skills-future/_data/unemployment.csv
- path: ../../src/themes/people-skills-future/_data/economic_inactivity.csv
hash: md5
md5: 7e4807e59f4b6f80201ca37f68a28319
size: 3566
- path: ../../src/themes/people-skills-future/_data/employment.csv
hash: md5
md5: b60d9a2d17a38edc13552071bf2187c0
size: 2943
md5: 04fb8c28d58910c5beb477f72bc00a30
size: 3572
qualifications:
cmd: PYTHONPATH=../.. python qualifications.py
deps:
Expand Down Expand Up @@ -88,12 +92,21 @@ stages:
hash: md5
checksum: '"14ccdbb3a4299537b8eb04d5d83a1ac0f8eed61d0847d29812fa42b3b7c0b694"'
size: 367480
- path:
https://raw.githubusercontent.com/open-innovations/yff-data-pipelines/main/data/processed/yff/neet-factors.csv
hash: md5
checksum: '"7334c58ffd7f27be60a5f4bac77d8b7fa378686479246ecde1a4bc63235850de"'
size: 555225
- path: neet.py
hash: md5
md5: 46288d019d453a255f52a168a00e1c54
size: 1549
md5: 8c17c33ee343cf2a7bb71c0d8f96728a
size: 1106
outs:
- path: ../../src/themes/people-skills-future/_data/neet.csv
hash: md5
md5: 9388a8604edf7c27235d8d27aeede084
size: 160
- path: ../../src/themes/people-skills-future/_data/risk_of_neet_by_la.csv
hash: md5
md5: 121be03fe40ba8f11044310423afd4bb
size: 23551
7 changes: 6 additions & 1 deletion pipelines/people/dvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ stages:
- ${TOP}/pipelines/util.py
- ${TOP}/working/cs/cs-true-north.csv
outs:
- ${TOP}/src/themes/people-skills-future/_data/unemployment.csv:
- ${TOP}/src/themes/people-skills-future/_data/employment.csv:
cache: false
- ${TOP}/src/themes/people-skills-future/_data/economic_inactivity.csv:
cache: false
qualifications:
cmd: PYTHONPATH=${TOP} python qualifications.py
Expand All @@ -36,6 +38,9 @@ stages:
deps:
- neet.py
- https://raw.githubusercontent.com/open-innovations/yff-data-pipelines/main/data/processed/neet.csv
- https://raw.githubusercontent.com/open-innovations/yff-data-pipelines/main/data/processed/yff/neet-factors.csv
outs:
- ${TOP}/src/themes/people-skills-future/_data/neet.csv:
cache: false
- ${TOP}/src/themes/people-skills-future/_data/risk_of_neet_by_la.csv:
cache: false
19 changes: 17 additions & 2 deletions pipelines/people/employment.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,21 @@
# convert the iso dates to unix
data = etl.addfield(data, 'unix_timestamp', iso_to_unix)

etl_write(data, os.path.join(TOP, 'src/themes/people-skills-future/_data/unemployment.csv'))
data = etl.addfield(data, 'decimal_date', decimal_date)

print("Got unemployment data")
etl_write(data, os.path.join(TOP, 'src/themes/people-skills-future/_data/employment.csv'))

ei_data = etl_load(WDIR, "cs/cs-true-north.csv")

ei_data = etl.select(ei_data, "{variable_name} == '% who are economically inactive - aged 16-64' and {measures_name} == 'Variable' ")

ei_data = etl.cut(ei_data, 'date', 'geography_code', 'value')

ei_data = etl.recast(ei_data, key='date', variablefield='geography_code', valuefield='value')

# convert the iso dates to unix
ei_data = etl.addfield(ei_data, 'unix_timestamp', iso_to_unix)

ei_data = etl.addfield(ei_data, 'decimal_date', decimal_date)

etl_write(ei_data, os.path.join(TOP, 'src/themes/people-skills-future/_data/economic_inactivity.csv'))
20 changes: 7 additions & 13 deletions pipelines/people/neet.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,21 @@
import duckdb
import pandas as pd

URL = "https://raw.githubusercontent.com/open-innovations/yff-data-pipelines/main/data/processed/neet.csv"
HEADLINE_URL = "https://raw.githubusercontent.com/open-innovations/yff-data-pipelines/main/data/processed/neet.csv"

LOCAL_AUTHORITY_URL = "https://raw.githubusercontent.com/open-innovations/yff-data-pipelines/main/data/processed/yff/neet-factors.csv"

def total_neet_16_24():
con = duckdb.connect()
data = con.execute(f"SELECT date, sheet, age, measure, value FROM '{URL}' WHERE sheet=='People - SA' AND age=='Aged 16-24' AND measure=='People who were NEET as a percentage of people in relevant population group'").fetch_df()
data = con.execute(f"SELECT date, sheet, age, measure, value FROM '{HEADLINE_URL}' WHERE sheet=='People - SA' AND age=='Aged 16-24' AND measure=='People who were NEET as a percentage of people in relevant population group'").fetch_df()
data = data.tail(1).set_index('date')
data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/_data/neet.csv'))
return

def neet_by_local_authority():
data = pd.read_csv(os.path.join(WDIR, 'neet/ud_neet_characteristics.csv'))
# combine all codes into one column. Uses LA code if exists, the region, then country.
data['geography_code'] = data['new_la_code'].combine_first(data['region_code']).combine_first(data['country_code'])
# data = data[data['new_la_code'].notnull()]

#drop un-used columns
data.drop(columns=['time_identifier', 'country_name', 'country_code', 'region_code', 'region_name', 'old_la_code', 'geographic_level'], inplace=True)
data = data[(data['Age']=='16-17') & (data['Characteristic']=='Total') & (data['time_period']==max(data['time_period']))]
data.set_index('time_period', inplace=True)
data.index.rename('date', inplace=True)
data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/_data/most_recent_neet_by_la.csv'))
con = duckdb.connect()
data = con.execute(f"SELECT * FROM '{LOCAL_AUTHORITY_URL}' WHERE variable=='Total Score'").fetchdf()
data.to_csv(os.path.join(SRC_DIR, 'themes/people-skills-future/_data/risk_of_neet_by_la.csv'), index=False)

if __name__ == "__main__":
total_neet_16_24()
Expand Down
4 changes: 4 additions & 0 deletions pipelines/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ def iso_to_unix(row):
dt = datetime.fromisoformat(iso_date)
return int(dt.timestamp())

def decimal_date(row):
timestamp = row['unix_timestamp']
return round((timestamp / (86400*365.25)) + 1970, 2)

def slugify_column_names(headers):
return [slugify(header, separator='_') for header in headers]

Expand Down
Loading

0 comments on commit 246fd8a

Please sign in to comment.