Skip to content

Commit

Permalink
Update GVA
Browse files Browse the repository at this point in the history
  • Loading branch information
luke-strange committed Jun 26, 2024
1 parent 702170a commit abae374
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 15 deletions.
10 changes: 5 additions & 5 deletions pipelines/sustainable/dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ stages:
deps:
- path: ../../pipelines/util.py
hash: md5
md5: 539e317fb1ff73675eba0177fe0ff274
size: 2174
md5: 4339ffbc976b069992b6f61e482b567f
size: 3489
- path: gva.py
hash: md5
md5: 0fab1bda58bb99b15f1f1804b953e9bf
size: 1209
md5: 84fa23b347b950328a05de2f33cdf263
size: 981
- path: https://github.com/economic-analytics/edd/raw/main/data/parquet/RGVA_LAD.parquet
hash: md5
checksum: '"c1be64ed511e603676b92efb34eb6f06a7543ca07cd563f7e52cee03c7cec9fa"'
Expand All @@ -22,5 +22,5 @@ stages:
size: 6215
- path: ../../src/themes/sustainable-growth/gva/index.vto
hash: md5
md5: 7f9e4d5a23e34fa6c3635d57321159dc
md5: 868d4a98e1aa9660c82f20eb75b82733
size: 1906
23 changes: 13 additions & 10 deletions pipelines/sustainable/gva.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,25 @@
from pipelines.util import *

URL = 'https://github.com/economic-analytics/edd/raw/main/data/parquet/RGVA_LAD.parquet'
query = f"SELECT \"dates.date\" AS date, \"variable.name\", \"geography.code\", \"industry.name\", value FROM '{URL}' WHERE \"industry.name\"=='All industries';"

def gva_by_local_authority():
con = duckdb.connect()
data = remote_parquet_as_dataframe(query)

# @TODO write a generalised function of below
# @TODO figure out why MAX('date') won't work
data = con.execute(f"SELECT \"dates.date\" AS date, \"variable.name\", \"geography.code\", \"industry.name\", value FROM '{URL}' WHERE \"industry.name\"=='All industries';").fetchdf()
# filter the frame
data = data[(data['variable.name'] == 'GVA Current Prices £m')]

# possible to rewrite below in the query,
# which may be quicker, but not sure if it's
# worth it given its easier for me to do in pandas.
data = data[(data['variable.name'] == 'GVA Current Prices £m') & (data['date'] == max(data['date']))]
# get the most recent date
data = most_recent_date(data, 'date')

# pivot the frame
data = data.pivot(columns='variable.name', values='value', index='geography.code')
#data.rename(columns={'GVA Current Prices £m': 'gva_current_prices', 'GVA Constant Prices £m': 'gva_constant_prices'}, inplace=True)

# write to csv
data.to_csv(os.path.join(SRC_DIR, 'themes/sustainable-growth/gva/_data/gva_lad.csv'))
return

if __name__ == "__main__":
gva_by_local_authority()
time_updated(os.path.join(SRC_DIR, 'themes/sustainable-growth/gva/index.vto'), 'nicetheme:')
time_updated(os.path.join(SRC_DIR, 'themes/sustainable-growth/gva/index.vto'), 'nicetheme:')
edd_last_updated_next_updated(id='RGVA_LAD')

0 comments on commit abae374

Please sign in to comment.