-
Notifications
You must be signed in to change notification settings - Fork 27
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Capital Gains Tax (and imputations) (#814)
* Add initial capital gains progress imputation * Add gains data and documentation * Format * Versioning * Update dockerfile * Format
- Loading branch information
1 parent
02b718c
commit b6eefc7
Showing
18 changed files
with
483 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
- bump: minor | ||
changes: | ||
added: | ||
- Initial version of capital gains imputations and logic. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[theme] | ||
primaryColor="#2C6496" | ||
font="serif" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
FROM python:3.9 | ||
RUN pip install policyengine-uk streamlit ipython | ||
WORKDIR /app | ||
COPY . /app | ||
EXPOSE 8501 | ||
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health | ||
ENTRYPOINT ["streamlit", "run", "Home.py", "--server.port=8501", "--server.address=0.0.0.0"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import streamlit as st | ||
|
||
STYLE = """ | ||
<style> | ||
header { | ||
display: none !important; | ||
} | ||
footer { | ||
display: none !important; | ||
} | ||
section > div.block-container { | ||
padding-top: 0px !important; | ||
padding-bottom: 0px !important; | ||
} | ||
html, body, [class*="css"] { | ||
font-family: "Roboto Serif", !important; | ||
font-weight: 500; | ||
} | ||
[data-baseweb="slider"] { | ||
padding-left: 10px !important; | ||
} | ||
#MainMenu { | ||
visibility: hidden; | ||
} | ||
footer { | ||
visibility: hidden; | ||
} | ||
.modebar{ | ||
display: none !important; | ||
} | ||
</style> | ||
""" | ||
st.write(STYLE, unsafe_allow_html=True) | ||
|
||
st.title("PolicyEngine UK documentation") | ||
|
||
st.markdown( | ||
""" | ||
This is the documentation for PolicyEngine UK, an open-source microsimulation model of the UK tax and benefit system. | ||
""" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
import streamlit as st | ||
import pandas as pd | ||
from microdf import MicroDataFrame | ||
import numpy as np | ||
import plotly.express as px | ||
from policyengine_core.charts import format_fig, BLUE, BLUE_LIGHT | ||
from Home import STYLE | ||
from policyengine_uk.data.storage import STORAGE_FOLDER | ||
|
||
# st.set_page_config(layout="wide") | ||
|
||
st.write(STYLE, unsafe_allow_html=True) | ||
st.title("Capital Gains Tax") | ||
|
||
st.markdown( | ||
"""This page documents PolicyEngine's in-progress capital gains imputations in the PolicyEngine UK microsimulation model.""" | ||
) | ||
|
||
st.subheader("Method") | ||
|
||
st.markdown( | ||
"""Our input data consists of: PolicyEngine's Enhanced FRS (incorporating WAS, LCFS, SPI and ONS/OBR summary data), and joint capital gains-taxable income data from [CAGE working paper no. 465, *Capital Gains and UK Inequality* (Arun Advani, Andy Summers)](https://warwick.ac.uk/fac/soc/economics/research/centres/cage/manage/publications/wp465.2020.pdf). | ||
This data includes p05, p10, p25, p50, p75, p90, and p95 percentiles of capital gains (given gains != 0) as well as the percentage with gains for each of over 60 income bands. We fit a spline to each income band's percentiles, and use these splines to impute capital gains for each individual in the microsimulation model as an initial approach. | ||
The below figure is interactive and shows the fitted spline for each income band. | ||
""" | ||
) | ||
|
||
st.warning( | ||
"**Caveat:** so far, we've only used income bands up to over £128,000, so won't capture the very highest earners." | ||
) | ||
|
||
capital_gains = pd.read_csv( | ||
STORAGE_FOLDER | ||
/ "imputations" | ||
/ "capital_gains_distribution_advani_summers.csv.gz" | ||
) | ||
capital_gains["maximum_total_income"] = ( | ||
capital_gains.minimum_total_income.shift(-1).fillna(np.inf) | ||
) | ||
# Fit a spline to each income band's percentiles | ||
from scipy.interpolate import UnivariateSpline | ||
|
||
splines = {} | ||
|
||
for i in range(len(capital_gains)): | ||
row = capital_gains.iloc[i] | ||
splines[row.minimum_total_income] = UnivariateSpline( | ||
[0.05, 0.1, 0.25, 0.5, 0.75, 0.90, 0.95], | ||
[row.p05, row.p10, row.p25, row.p50, row.p75, row.p90, row.p95], | ||
k=2, | ||
) | ||
|
||
with st.expander("Capital gains-income joint distribution input data"): | ||
st.dataframe(capital_gains) | ||
|
||
with st.expander("Capital gains-income joint distribution fitted splines"): | ||
income_band = st.select_slider( | ||
"Income band", | ||
capital_gains.minimum_total_income, | ||
format_func=lambda x: f"£{x:,.0f}", | ||
) | ||
|
||
fig = ( | ||
px.line( | ||
x=np.linspace(0, 1, 100), | ||
y=splines[income_band](np.linspace(0, 1, 100)), | ||
) | ||
.update_layout( | ||
title="Percentiles of capital gains", | ||
yaxis_title="Capital gains", | ||
xaxis_title="Percentile", | ||
yaxis_tickformat=",.0f", | ||
yaxis_tickprefix="£", | ||
xaxis_tickformat=".0%", | ||
yaxis_range=[capital_gains.p05.min(), capital_gains.p95.max()], | ||
) | ||
.update_traces(line=dict(color=BLUE)) | ||
) | ||
|
||
st.plotly_chart(format_fig(fig), use_container_width=True) | ||
|
||
from tqdm import tqdm | ||
from policyengine_uk.system import system | ||
|
||
cgt_revenue = system.parameters.calibration.programs.capital_gains.total | ||
|
||
lower_income_bounds = list(splines) | ||
uprating_from_2017 = cgt_revenue("2023-01-01") / cgt_revenue("2017-01-01") | ||
|
||
|
||
def impute_capital_gains(total_income: float) -> float: | ||
if total_income < 0: | ||
return 0 | ||
distribution_row = capital_gains[ | ||
(capital_gains["minimum_total_income"] <= total_income) | ||
& (capital_gains["maximum_total_income"] > total_income) | ||
] | ||
percent_with_gains = distribution_row["percent_with_gains"].values[0] | ||
has_gains = np.random.choice( | ||
[0, 1], p=[1 - percent_with_gains, percent_with_gains] | ||
) | ||
if not has_gains: | ||
return 0 | ||
for i in range(len(splines)): | ||
if lower_income_bounds[i] > total_income: | ||
continue | ||
i -= 1 | ||
sample_percentile = np.random.random() | ||
spline = splines[lower_income_bounds[i]] | ||
return spline(sample_percentile) * uprating_from_2017 | ||
|
||
|
||
imputed_gains = [] | ||
|
||
st.markdown( | ||
"""Then, for every household in the model, we randomly sample their probability of gains according to the capital gains statistics, and sample a random quantile from the relevant income band's fitted spline to determine the amount if they are imputed to have gains. You can run this process on individual income data inputs below.""" | ||
) | ||
|
||
with st.expander("Capital gains imputation test runner"): | ||
|
||
income = st.slider("Total income", 0, 500000, 50000, 1000) | ||
|
||
with st.spinner("Imputing capital gains..."): | ||
capital_gains = [impute_capital_gains(income) for _ in range(100)] | ||
|
||
fig = ( | ||
px.histogram(x=capital_gains, nbins=10) | ||
.update_layout( | ||
title="Imputed capital gains", | ||
xaxis_title="Capital gains", | ||
yaxis_title="Frequency", | ||
xaxis_tickformat=",.0f", | ||
xaxis_tickprefix="£", | ||
xaxis_range=[0, 1_000_000], | ||
) | ||
.update_traces(marker=dict(color=BLUE)) | ||
) | ||
|
||
st.plotly_chart(format_fig(fig), use_container_width=True) | ||
|
||
st.subheader("Analysis") | ||
|
||
st.markdown( | ||
"""We can use the imputed capital gains to analyse the distribution of capital gains in the model. The below figure shows the joint distribution of total income and capital gains as a scatter plot.""" | ||
) | ||
|
||
st.warning("**Again**- in progress.") | ||
|
||
|
||
@st.cache_resource | ||
def get_microsimulation(): | ||
from policyengine_uk import Microsimulation | ||
|
||
sim = Microsimulation() | ||
sim.calculate("household_net_income") | ||
return sim | ||
|
||
|
||
sim = get_microsimulation() | ||
|
||
col1, col2 = st.columns(2) | ||
|
||
with col1: | ||
st.metric( | ||
"Total capital gains", | ||
f"£{sim.calculate('capital_gains').sum()/1e9:.1f}bn", | ||
) | ||
|
||
with col2: | ||
st.metric( | ||
"Total CGT revenue", | ||
f"£{sim.calculate('capital_gains_tax').sum()/1e9:.1f}bn", | ||
) | ||
|
||
|
||
with st.expander("PolicyEngine UK capital gains-income joint distribution"): | ||
fig = ( | ||
px.scatter( | ||
x=sim.calculate("total_income"), | ||
y=sim.calculate("capital_gains"), | ||
opacity=0.1, | ||
) | ||
.update_traces(line=dict(color=BLUE)) | ||
.update_layout( | ||
title="PolicyEngine UK capital gains-income joint distribution", | ||
xaxis_title="Total income", | ||
yaxis_title="Capital gains", | ||
xaxis_tickformat=",.0f", | ||
xaxis_tickprefix="£", | ||
yaxis_tickformat=",.0f", | ||
yaxis_tickprefix="£", | ||
) | ||
) | ||
|
||
st.plotly_chart(format_fig(fig), use_container_width=True) |
72 changes: 72 additions & 0 deletions
72
policyengine_uk/data/datasets/frs/imputations/capital_gains.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import pandas as pd | ||
import numpy as np | ||
|
||
# Fit a spline to each income band's percentiles | ||
from scipy.interpolate import UnivariateSpline | ||
from policyengine_uk import Microsimulation | ||
from tqdm import tqdm | ||
from policyengine_uk.system import system | ||
from policyengine_uk.data.storage import STORAGE_FOLDER | ||
|
||
capital_gains = pd.read_csv( | ||
STORAGE_FOLDER | ||
/ "imputations" | ||
/ "capital_gains_distribution_advani_summers.csv.gz" | ||
) | ||
capital_gains["maximum_total_income"] = ( | ||
capital_gains.minimum_total_income.shift(-1).fillna(np.inf) | ||
) | ||
|
||
|
||
splines = {} | ||
|
||
for i in range(len(capital_gains)): | ||
row = capital_gains.iloc[i] | ||
splines[row.minimum_total_income] = UnivariateSpline( | ||
[0.05, 0.1, 0.25, 0.5, 0.75, 0.90, 0.95], | ||
[row.p05, row.p10, row.p25, row.p50, row.p75, row.p90, row.p95], | ||
k=1, | ||
) | ||
|
||
|
||
sim = Microsimulation() | ||
|
||
total_income = sim.calculate("total_income", 2023) | ||
cgt_revenue = system.parameters.calibration.programs.capital_gains.total | ||
|
||
lower_income_bounds = list(splines) | ||
uprating_from_2017 = cgt_revenue("2023-01-01") / cgt_revenue("2017-01-01") | ||
|
||
|
||
def impute_capital_gains(total_income: float, age: float) -> float: | ||
if total_income < 0 or age < 18: | ||
return 0 | ||
distribution_row = capital_gains[ | ||
(capital_gains["minimum_total_income"] <= total_income) | ||
& (capital_gains["maximum_total_income"] > total_income) | ||
] | ||
percent_with_gains = distribution_row["percent_with_gains"].values[0] | ||
has_gains = np.random.choice( | ||
[0, 1], p=[1 - percent_with_gains, percent_with_gains] | ||
) | ||
if not has_gains: | ||
return 0 | ||
sample_percentile = np.random.random() | ||
for i in range(len(splines)): | ||
if lower_income_bounds[i] > total_income: | ||
continue | ||
i -= 1 | ||
spline = splines[lower_income_bounds[i]] | ||
return spline(sample_percentile) * uprating_from_2017 | ||
|
||
|
||
if __name__ == "__main__": | ||
imputed_gains = [] | ||
for income, age in tqdm( | ||
list(zip(total_income, sim.calculate("age", 2023))) | ||
): | ||
imputed_gains.append(impute_capital_gains(income, age)) | ||
|
||
pd.DataFrame({"imputed_gains": imputed_gains}).to_csv( | ||
STORAGE_FOLDER / "imputations" / "imputed_gains.csv.gz", index=False | ||
) |
Binary file added
BIN
+1.64 KB
policyengine_uk/data/storage/imputations/capital_gains_distribution_advani_summers.csv.gz
Binary file not shown.
Binary file not shown.
39 changes: 39 additions & 0 deletions
39
policyengine_uk/parameters/calibration/programs/capital_gains/tax.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
description: Capital gains tax revenue. | ||
values: | ||
1999-01-01: 2_122_000_000 | ||
2000-01-01: 3_236_000_000 | ||
2001-01-01: 3_034_000_000 | ||
2002-01-01: 1_596_000_000 | ||
2003-01-01: 2_225_000_000 | ||
2004-01-01: 2_282_000_000 | ||
2005-01-01: 3_042_000_000 | ||
2006-01-01: 3_830_000_000 | ||
2007-01-01: 5_268_000_000 | ||
2008-01-01: 7_852_000_000 | ||
2009-01-01: 2_491_000_000 | ||
2010-01-01: 3_601_000_000 | ||
2011-01-01: 4_337_000_000 | ||
2012-01-01: 3_927_000_000 | ||
2013-01-01: 3_908_000_000 | ||
2014-01-01: 5_559_000_000 | ||
2015-01-01: 7_060_000_000 | ||
2016-01-01: 8_561_000_000 | ||
2017-01-01: 7_793_000_000 | ||
2018-01-01: 9_191_000_000 | ||
2019-01-01: 9_827_000_000 | ||
2020-01-01: 11_131_000_000 | ||
2021-01-01: 15_267_000_000 | ||
2022-01-01: 18_077_057_790 | ||
# OBR Forecast | ||
2023-01-01: 17_759_351_662 | ||
2024-01-01: 19_512_453_309 | ||
2025-01-01: 21_164_830_357 | ||
2026-01-01: 23_383_475_972 | ||
2027-01-01: 26_144_242_482 | ||
|
||
metadata: | ||
unit: currency-GBP | ||
label: Capital Gains Tax revenue | ||
reference: | ||
- title: Capital Gains Tax | OBR | ||
href: https://obr.uk/forecasts-in-depth/tax-by-tax-spend-by-spend/capital-gains-tax |
Oops, something went wrong.