Vacancy (thousands) by sector. #2

open-innovations · May 24, 2024 · 892b6e8 · 892b6e8
1 parent de81ad0
commit 892b6e8
Show file tree

Hide file tree

Showing 4 changed files with 4,982 additions and 5 deletions.
diff --git a/pipelines/people/dvc.lock b/pipelines/people/dvc.lock
@@ -11,15 +11,23 @@ stages:
       hash: md5
       md5: f7ada6a8f6bf1c292871898fa47fed07
       size: 497028
+    - path: https://github.com/economic-analytics/edd/raw/main/data/parquet/LMS.parquet
+      hash: md5
+      checksum: '"60f870b20456d96fbd047a3c7c25a0f829e88b8a45a24a13fcafa5157c96ed9d"'
+      size: 3113603
     - path: vacancies.py
       hash: md5
-      md5: 929ba58398452214227ecfe5a565d3ef
-      size: 719
+      md5: 0c5b04e2629bb46cdbdef7e6ca6b66e3
+      size: 1428
     outs:
     - path: ../../src/themes/people-skills-future/_data/growth_by_sector.csv
       hash: md5
       md5: d299bf8d940b93bb089c0cd7ed979cfe
       size: 58294
+    - path: ../../src/themes/people-skills-future/_data/vacancies_by_sector.csv
+      hash: md5
+      md5: 1371114d3de595bca9e0effe4f2a90aa
+      size: 451461
   employment:
     cmd: PYTHONPATH=../.. python employment.py
     deps:

diff --git a/pipelines/people/dvc.yaml b/pipelines/people/dvc.yaml
@@ -7,9 +7,12 @@ stages:
       - vacancies.py
       - ${TOP}/pipelines/util.py
       - ${TOP}/working/cs/vacancies_by_sector_percentage_change_on_previous.csv
+      - https://github.com/economic-analytics/edd/raw/main/data/parquet/LMS.parquet
     outs:
       - ${TOP}/src/themes/people-skills-future/_data/growth_by_sector.csv:
           cache: false
+      - ${TOP}/src/themes/people-skills-future/_data/vacancies_by_sector.csv:
+          cache: false
   employment:
     cmd: PYTHONPATH=${TOP} python employment.py
     deps:

diff --git a/pipelines/people/vacancies.py b/pipelines/people/vacancies.py
@@ -1,6 +1,6 @@
 from pipelines.util import *
-
-if __name__ == "__main__":
+import duckdb
+def vacancies_growth():
 
     data = etl_load(WDIR, 'cs/vacancies_by_sector_percentage_change_on_previous.csv')
 
@@ -16,5 +16,20 @@
     data = etl.transpose(data).rename({"dates_date": "sector"})
 
     etl_write(data, os.path.join(SRC_DIR, 'themes/people-skills-future/_data/growth_by_sector.csv'))
-
+
+    return
+
+def vacancies_by_sector():
+    URL = "https://github.com/economic-analytics/edd/raw/main/data/parquet/LMS.parquet"
+    con = duckdb.connect()
+    start_code = 'JP9H'
+    end_code = 'JP9Y'
+    codes = [start_code[:-1] + chr(char) for char in range(ord(start_code[-1]), ord(end_code[-1]) + 1 )]
+    data = con.execute(f"SELECT \"dates.date\" AS date, \"variable.name\", \"variable.unit\", \"variable.code\", value  FROM '{URL}' WHERE \"dates.freq\"=='m'").fetchdf()
+    data = data[data['variable.code'].isin(codes)]
+    data.to_csv(os.path.join(SRC_DIR,'themes/people-skills-future/_data/vacancies_by_sector.csv'))
+
+if __name__ == "__main__":
+    vacancies_growth()
+    vacancies_by_sector()
     print('Finished vacancies')