Skip to content

Commit

Permalink
Vacancy (thousands) by sector. #2
Browse files Browse the repository at this point in the history
  • Loading branch information
luke-strange committed May 24, 2024
1 parent de81ad0 commit 892b6e8
Show file tree
Hide file tree
Showing 4 changed files with 4,982 additions and 5 deletions.
12 changes: 10 additions & 2 deletions pipelines/people/dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,23 @@ stages:
hash: md5
md5: f7ada6a8f6bf1c292871898fa47fed07
size: 497028
- path: https://github.com/economic-analytics/edd/raw/main/data/parquet/LMS.parquet
hash: md5
checksum: '"60f870b20456d96fbd047a3c7c25a0f829e88b8a45a24a13fcafa5157c96ed9d"'
size: 3113603
- path: vacancies.py
hash: md5
md5: 929ba58398452214227ecfe5a565d3ef
size: 719
md5: 0c5b04e2629bb46cdbdef7e6ca6b66e3
size: 1428
outs:
- path: ../../src/themes/people-skills-future/_data/growth_by_sector.csv
hash: md5
md5: d299bf8d940b93bb089c0cd7ed979cfe
size: 58294
- path: ../../src/themes/people-skills-future/_data/vacancies_by_sector.csv
hash: md5
md5: 1371114d3de595bca9e0effe4f2a90aa
size: 451461
employment:
cmd: PYTHONPATH=../.. python employment.py
deps:
Expand Down
3 changes: 3 additions & 0 deletions pipelines/people/dvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@ stages:
- vacancies.py
- ${TOP}/pipelines/util.py
- ${TOP}/working/cs/vacancies_by_sector_percentage_change_on_previous.csv
- https://github.com/economic-analytics/edd/raw/main/data/parquet/LMS.parquet
outs:
- ${TOP}/src/themes/people-skills-future/_data/growth_by_sector.csv:
cache: false
- ${TOP}/src/themes/people-skills-future/_data/vacancies_by_sector.csv:
cache: false
employment:
cmd: PYTHONPATH=${TOP} python employment.py
deps:
Expand Down
21 changes: 18 additions & 3 deletions pipelines/people/vacancies.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pipelines.util import *

if __name__ == "__main__":
import duckdb
def vacancies_growth():

data = etl_load(WDIR, 'cs/vacancies_by_sector_percentage_change_on_previous.csv')

Expand All @@ -16,5 +16,20 @@
data = etl.transpose(data).rename({"dates_date": "sector"})

etl_write(data, os.path.join(SRC_DIR, 'themes/people-skills-future/_data/growth_by_sector.csv'))


return

def vacancies_by_sector():
URL = "https://github.com/economic-analytics/edd/raw/main/data/parquet/LMS.parquet"
con = duckdb.connect()
start_code = 'JP9H'
end_code = 'JP9Y'
codes = [start_code[:-1] + chr(char) for char in range(ord(start_code[-1]), ord(end_code[-1]) + 1 )]
data = con.execute(f"SELECT \"dates.date\" AS date, \"variable.name\", \"variable.unit\", \"variable.code\", value FROM '{URL}' WHERE \"dates.freq\"=='m'").fetchdf()
data = data[data['variable.code'].isin(codes)]
data.to_csv(os.path.join(SRC_DIR,'themes/people-skills-future/_data/vacancies_by_sector.csv'))

if __name__ == "__main__":
vacancies_growth()
vacancies_by_sector()
print('Finished vacancies')
Loading

0 comments on commit 892b6e8

Please sign in to comment.