diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..cdf2ece25 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + added: + - Initial version of capital gains imputations and logic. diff --git a/docs/streamlit/.streamlit/config.toml b/docs/streamlit/.streamlit/config.toml new file mode 100644 index 000000000..9c7195826 --- /dev/null +++ b/docs/streamlit/.streamlit/config.toml @@ -0,0 +1,3 @@ +[theme] +primaryColor="#2C6496" +font="serif" diff --git a/docs/streamlit/Dockerfile b/docs/streamlit/Dockerfile new file mode 100644 index 000000000..5a1b46baa --- /dev/null +++ b/docs/streamlit/Dockerfile @@ -0,0 +1,7 @@ +FROM python:3.9 +RUN pip install policyengine-uk streamlit ipython +WORKDIR /app +COPY . /app +EXPOSE 8501 +HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health +ENTRYPOINT ["streamlit", "run", "Home.py", "--server.port=8501", "--server.address=0.0.0.0"] diff --git a/docs/streamlit/Home.py b/docs/streamlit/Home.py new file mode 100644 index 000000000..5bb4adb71 --- /dev/null +++ b/docs/streamlit/Home.py @@ -0,0 +1,41 @@ +import streamlit as st + +STYLE = """ + +""" +st.write(STYLE, unsafe_allow_html=True) + +st.title("PolicyEngine UK documentation") + +st.markdown( + """ + This is the documentation for PolicyEngine UK, an open-source microsimulation model of the UK tax and benefit system. + """ +) diff --git a/docs/streamlit/pages/Capital_Gains_Tax.py b/docs/streamlit/pages/Capital_Gains_Tax.py new file mode 100644 index 000000000..b4bdb1fba --- /dev/null +++ b/docs/streamlit/pages/Capital_Gains_Tax.py @@ -0,0 +1,197 @@ +import streamlit as st +import pandas as pd +from microdf import MicroDataFrame +import numpy as np +import plotly.express as px +from policyengine_core.charts import format_fig, BLUE, BLUE_LIGHT +from Home import STYLE +from policyengine_uk.data.storage import STORAGE_FOLDER + +# st.set_page_config(layout="wide") + +st.write(STYLE, unsafe_allow_html=True) +st.title("Capital Gains Tax") + +st.markdown( + """This page documents PolicyEngine's in-progress capital gains imputations in the PolicyEngine UK microsimulation model.""" +) + +st.subheader("Method") + +st.markdown( + """Our input data consists of: PolicyEngine's Enhanced FRS (incorporating WAS, LCFS, SPI and ONS/OBR summary data), and joint capital gains-taxable income data from [CAGE working paper no. 465, *Capital Gains and UK Inequality* (Arun Advani, Andy Summers)](https://warwick.ac.uk/fac/soc/economics/research/centres/cage/manage/publications/wp465.2020.pdf). + +This data includes p05, p10, p25, p50, p75, p90, and p95 percentiles of capital gains (given gains != 0) as well as the percentage with gains for each of over 60 income bands. We fit a spline to each income band's percentiles, and use these splines to impute capital gains for each individual in the microsimulation model as an initial approach. + +The below figure is interactive and shows the fitted spline for each income band. + """ +) + +st.warning( + "**Caveat:** so far, we've only used income bands up to over £128,000, so won't capture the very highest earners." +) + +capital_gains = pd.read_csv( + STORAGE_FOLDER + / "imputations" + / "capital_gains_distribution_advani_summers.csv.gz" +) +capital_gains["maximum_total_income"] = ( + capital_gains.minimum_total_income.shift(-1).fillna(np.inf) +) +# Fit a spline to each income band's percentiles +from scipy.interpolate import UnivariateSpline + +splines = {} + +for i in range(len(capital_gains)): + row = capital_gains.iloc[i] + splines[row.minimum_total_income] = UnivariateSpline( + [0.05, 0.1, 0.25, 0.5, 0.75, 0.90, 0.95], + [row.p05, row.p10, row.p25, row.p50, row.p75, row.p90, row.p95], + k=2, + ) + +with st.expander("Capital gains-income joint distribution input data"): + st.dataframe(capital_gains) + +with st.expander("Capital gains-income joint distribution fitted splines"): + income_band = st.select_slider( + "Income band", + capital_gains.minimum_total_income, + format_func=lambda x: f"£{x:,.0f}", + ) + + fig = ( + px.line( + x=np.linspace(0, 1, 100), + y=splines[income_band](np.linspace(0, 1, 100)), + ) + .update_layout( + title="Percentiles of capital gains", + yaxis_title="Capital gains", + xaxis_title="Percentile", + yaxis_tickformat=",.0f", + yaxis_tickprefix="£", + xaxis_tickformat=".0%", + yaxis_range=[capital_gains.p05.min(), capital_gains.p95.max()], + ) + .update_traces(line=dict(color=BLUE)) + ) + + st.plotly_chart(format_fig(fig), use_container_width=True) + +from tqdm import tqdm +from policyengine_uk.system import system + +cgt_revenue = system.parameters.calibration.programs.capital_gains.total + +lower_income_bounds = list(splines) +uprating_from_2017 = cgt_revenue("2023-01-01") / cgt_revenue("2017-01-01") + + +def impute_capital_gains(total_income: float) -> float: + if total_income < 0: + return 0 + distribution_row = capital_gains[ + (capital_gains["minimum_total_income"] <= total_income) + & (capital_gains["maximum_total_income"] > total_income) + ] + percent_with_gains = distribution_row["percent_with_gains"].values[0] + has_gains = np.random.choice( + [0, 1], p=[1 - percent_with_gains, percent_with_gains] + ) + if not has_gains: + return 0 + for i in range(len(splines)): + if lower_income_bounds[i] > total_income: + continue + i -= 1 + sample_percentile = np.random.random() + spline = splines[lower_income_bounds[i]] + return spline(sample_percentile) * uprating_from_2017 + + +imputed_gains = [] + +st.markdown( + """Then, for every household in the model, we randomly sample their probability of gains according to the capital gains statistics, and sample a random quantile from the relevant income band's fitted spline to determine the amount if they are imputed to have gains. You can run this process on individual income data inputs below.""" +) + +with st.expander("Capital gains imputation test runner"): + + income = st.slider("Total income", 0, 500000, 50000, 1000) + + with st.spinner("Imputing capital gains..."): + capital_gains = [impute_capital_gains(income) for _ in range(100)] + + fig = ( + px.histogram(x=capital_gains, nbins=10) + .update_layout( + title="Imputed capital gains", + xaxis_title="Capital gains", + yaxis_title="Frequency", + xaxis_tickformat=",.0f", + xaxis_tickprefix="£", + xaxis_range=[0, 1_000_000], + ) + .update_traces(marker=dict(color=BLUE)) + ) + + st.plotly_chart(format_fig(fig), use_container_width=True) + +st.subheader("Analysis") + +st.markdown( + """We can use the imputed capital gains to analyse the distribution of capital gains in the model. The below figure shows the joint distribution of total income and capital gains as a scatter plot.""" +) + +st.warning("**Again**- in progress.") + + +@st.cache_resource +def get_microsimulation(): + from policyengine_uk import Microsimulation + + sim = Microsimulation() + sim.calculate("household_net_income") + return sim + + +sim = get_microsimulation() + +col1, col2 = st.columns(2) + +with col1: + st.metric( + "Total capital gains", + f"£{sim.calculate('capital_gains').sum()/1e9:.1f}bn", + ) + +with col2: + st.metric( + "Total CGT revenue", + f"£{sim.calculate('capital_gains_tax').sum()/1e9:.1f}bn", + ) + + +with st.expander("PolicyEngine UK capital gains-income joint distribution"): + fig = ( + px.scatter( + x=sim.calculate("total_income"), + y=sim.calculate("capital_gains"), + opacity=0.1, + ) + .update_traces(line=dict(color=BLUE)) + .update_layout( + title="PolicyEngine UK capital gains-income joint distribution", + xaxis_title="Total income", + yaxis_title="Capital gains", + xaxis_tickformat=",.0f", + xaxis_tickprefix="£", + yaxis_tickformat=",.0f", + yaxis_tickprefix="£", + ) + ) + + st.plotly_chart(format_fig(fig), use_container_width=True) diff --git a/policyengine_uk/data/datasets/frs/imputations/capital_gains.py b/policyengine_uk/data/datasets/frs/imputations/capital_gains.py new file mode 100644 index 000000000..4b37cdac5 --- /dev/null +++ b/policyengine_uk/data/datasets/frs/imputations/capital_gains.py @@ -0,0 +1,72 @@ +import pandas as pd +import numpy as np + +# Fit a spline to each income band's percentiles +from scipy.interpolate import UnivariateSpline +from policyengine_uk import Microsimulation +from tqdm import tqdm +from policyengine_uk.system import system +from policyengine_uk.data.storage import STORAGE_FOLDER + +capital_gains = pd.read_csv( + STORAGE_FOLDER + / "imputations" + / "capital_gains_distribution_advani_summers.csv.gz" +) +capital_gains["maximum_total_income"] = ( + capital_gains.minimum_total_income.shift(-1).fillna(np.inf) +) + + +splines = {} + +for i in range(len(capital_gains)): + row = capital_gains.iloc[i] + splines[row.minimum_total_income] = UnivariateSpline( + [0.05, 0.1, 0.25, 0.5, 0.75, 0.90, 0.95], + [row.p05, row.p10, row.p25, row.p50, row.p75, row.p90, row.p95], + k=1, + ) + + +sim = Microsimulation() + +total_income = sim.calculate("total_income", 2023) +cgt_revenue = system.parameters.calibration.programs.capital_gains.total + +lower_income_bounds = list(splines) +uprating_from_2017 = cgt_revenue("2023-01-01") / cgt_revenue("2017-01-01") + + +def impute_capital_gains(total_income: float, age: float) -> float: + if total_income < 0 or age < 18: + return 0 + distribution_row = capital_gains[ + (capital_gains["minimum_total_income"] <= total_income) + & (capital_gains["maximum_total_income"] > total_income) + ] + percent_with_gains = distribution_row["percent_with_gains"].values[0] + has_gains = np.random.choice( + [0, 1], p=[1 - percent_with_gains, percent_with_gains] + ) + if not has_gains: + return 0 + sample_percentile = np.random.random() + for i in range(len(splines)): + if lower_income_bounds[i] > total_income: + continue + i -= 1 + spline = splines[lower_income_bounds[i]] + return spline(sample_percentile) * uprating_from_2017 + + +if __name__ == "__main__": + imputed_gains = [] + for income, age in tqdm( + list(zip(total_income, sim.calculate("age", 2023))) + ): + imputed_gains.append(impute_capital_gains(income, age)) + + pd.DataFrame({"imputed_gains": imputed_gains}).to_csv( + STORAGE_FOLDER / "imputations" / "imputed_gains.csv.gz", index=False + ) diff --git a/policyengine_uk/data/storage/imputations/capital_gains_distribution_advani_summers.csv.gz b/policyengine_uk/data/storage/imputations/capital_gains_distribution_advani_summers.csv.gz new file mode 100644 index 000000000..01e4ee0b0 Binary files /dev/null and b/policyengine_uk/data/storage/imputations/capital_gains_distribution_advani_summers.csv.gz differ diff --git a/policyengine_uk/data/storage/imputations/imputed_gains.csv.gz b/policyengine_uk/data/storage/imputations/imputed_gains.csv.gz new file mode 100644 index 000000000..02691494e Binary files /dev/null and b/policyengine_uk/data/storage/imputations/imputed_gains.csv.gz differ diff --git a/policyengine_uk/parameters/calibration/programs/capital_gains/tax.yaml b/policyengine_uk/parameters/calibration/programs/capital_gains/tax.yaml new file mode 100644 index 000000000..015cad95b --- /dev/null +++ b/policyengine_uk/parameters/calibration/programs/capital_gains/tax.yaml @@ -0,0 +1,39 @@ +description: Capital gains tax revenue. +values: + 1999-01-01: 2_122_000_000 + 2000-01-01: 3_236_000_000 + 2001-01-01: 3_034_000_000 + 2002-01-01: 1_596_000_000 + 2003-01-01: 2_225_000_000 + 2004-01-01: 2_282_000_000 + 2005-01-01: 3_042_000_000 + 2006-01-01: 3_830_000_000 + 2007-01-01: 5_268_000_000 + 2008-01-01: 7_852_000_000 + 2009-01-01: 2_491_000_000 + 2010-01-01: 3_601_000_000 + 2011-01-01: 4_337_000_000 + 2012-01-01: 3_927_000_000 + 2013-01-01: 3_908_000_000 + 2014-01-01: 5_559_000_000 + 2015-01-01: 7_060_000_000 + 2016-01-01: 8_561_000_000 + 2017-01-01: 7_793_000_000 + 2018-01-01: 9_191_000_000 + 2019-01-01: 9_827_000_000 + 2020-01-01: 11_131_000_000 + 2021-01-01: 15_267_000_000 + 2022-01-01: 18_077_057_790 + # OBR Forecast + 2023-01-01: 17_759_351_662 + 2024-01-01: 19_512_453_309 + 2025-01-01: 21_164_830_357 + 2026-01-01: 23_383_475_972 + 2027-01-01: 26_144_242_482 + +metadata: + unit: currency-GBP + label: Capital Gains Tax revenue + reference: + - title: Capital Gains Tax | OBR + href: https://obr.uk/forecasts-in-depth/tax-by-tax-spend-by-spend/capital-gains-tax diff --git a/policyengine_uk/parameters/calibration/programs/capital_gains/total.yaml b/policyengine_uk/parameters/calibration/programs/capital_gains/total.yaml new file mode 100644 index 000000000..21b3fc668 --- /dev/null +++ b/policyengine_uk/parameters/calibration/programs/capital_gains/total.yaml @@ -0,0 +1,45 @@ +description: Total capital gains by individuals. +values: + 1987-01-01: 7_994_000_000 + 1988-01-01: 5_366_000_000 + 1989-01-01: 4_832_000_000 + 1990-01-01: 2_912_000_000 + 1991-01-01: 2_634_000_000 + 1992-01-01: 1_885_000_000 + 1993-01-01: 2_740_000_000 + 1994-01-01: 2_212_000_000 + 1995-01-01: 3_131_000_000 + 1996-01-01: 3_918_000_000 + 1997-01-01: 5_444_000_000 + 1998-01-01: 5_958_000_000 + 1999-01-01: 8_761_000_000 + 2000-01-01: 7_732_000_000 + 2001-01-01: 4_556_000_000 + 2002-01-01: 6_588_000_000 + 2003-01-01: 6_869_000_000 + 2004-01-01: 9_170_000_000 + 2005-01-01: 11_562_000_000 + 2006-01-01: 15_404_000_000 + 2007-01-01: 20_874_000_000 + 2008-01-01: 14_005_000_000 + 2009-01-01: 18_423_000_000 + 2010-01-01: 23_295_000_000 + 2011-01-01: 21_695_000_000 + 2012-01-01: 22_106_000_000 + 2013-01-01: 30_488_000_000 + 2014-01-01: 38_096_000_000 + 2015-01-01: 47_254_000_000 + 2016-01-01: 48_176_000_000 + 2017-01-01: 55_570_000_000 + 2018-01-01: 60_662_000_000 + 2019-01-01: 63_488_000_000 + 2020-01-01: 76_582_000_000 + 2021-01-01: 87_275_000_000 + +metadata: + unit: currency-GBP + label: Total capital gains + uprating: calibration.programs.capital_gains.tax + reference: + - title: Capital Gains Tax statistics | GOV.UK | Table 1 + href: https://www.gov.uk/government/statistics/capital-gains-tax-statistics diff --git a/policyengine_uk/parameters/gov/hmrc/cgt/README.md b/policyengine_uk/parameters/gov/hmrc/cgt/README.md new file mode 100644 index 000000000..76c28448f --- /dev/null +++ b/policyengine_uk/parameters/gov/hmrc/cgt/README.md @@ -0,0 +1 @@ +# Capital Gains Tax diff --git a/policyengine_uk/parameters/gov/hmrc/cgt/annual_exempt_amount.yaml b/policyengine_uk/parameters/gov/hmrc/cgt/annual_exempt_amount.yaml new file mode 100644 index 000000000..832ca5da8 --- /dev/null +++ b/policyengine_uk/parameters/gov/hmrc/cgt/annual_exempt_amount.yaml @@ -0,0 +1,18 @@ +description: Annual Exempt Amount for individuals. This parameter is under active development and reforms including it should not be cited. +values: + 2018-01-01: 11_700 + 2019-01-01: 12_000 + 2020-01-01: 12_300 + 2023-01-01: + value: 6_000 + reference: + - title: Finance Act 2023 s. 8(2) + href: https://www.legislation.gov.uk/ukpga/2023/1/section/8 +metadata: + unit: currency-GBP + label: Annual Exempt Amount + reference: + - title: Taxation of Chargeable Gains Act 1992 s. 1K(2) # Current law + href: https://www.legislation.gov.uk/ukpga/1992/12/section/1K + - title: GOV.UK | Capital Gains Tax rates and allowances # For 2018-22 + href: https://www.gov.uk/guidance/capital-gains-tax-rates-and-allowances#tax-free-allowances-for-capital-gains-tax diff --git a/policyengine_uk/parameters/gov/hmrc/cgt/basic_rate.yaml b/policyengine_uk/parameters/gov/hmrc/cgt/basic_rate.yaml new file mode 100644 index 000000000..1671eb74f --- /dev/null +++ b/policyengine_uk/parameters/gov/hmrc/cgt/basic_rate.yaml @@ -0,0 +1,6 @@ +description: Capital gains tax rate on basic rate taxpayers. This parameter is under active development and reforms including it should not be cited. +values: + 2015-01-01: 0.1 +metadata: + unit: /1 + label: Capital Gains Tax basic rate diff --git a/policyengine_uk/parameters/gov/hmrc/cgt/higher_rate.yaml b/policyengine_uk/parameters/gov/hmrc/cgt/higher_rate.yaml new file mode 100644 index 000000000..1623bf4c9 --- /dev/null +++ b/policyengine_uk/parameters/gov/hmrc/cgt/higher_rate.yaml @@ -0,0 +1,6 @@ +description: Capital gains tax rate on higher and additional rate taxpayers. This parameter is under active development and reforms including it should not be cited. +values: + 2015-01-01: 0.2 +metadata: + unit: /1 + label: Capital Gains Tax higher rate diff --git a/policyengine_uk/system.py b/policyengine_uk/system.py index 7412f7bac..321d1098f 100644 --- a/policyengine_uk/system.py +++ b/policyengine_uk/system.py @@ -12,6 +12,8 @@ FRS_2020_21, CalibratedSPIEnhancedPooledFRS_2019_21, ) +from policyengine_uk.data.storage import STORAGE_FOLDER +import pandas as pd from policyengine_uk.reforms import create_structural_reforms_from_parameters @@ -81,6 +83,12 @@ def __init__(self, *args, **kwargs): if reform is not None: self.apply_reform(reform) + if self.dataset.name == "enhanced_frs": + capital_gains = pd.read_csv( + STORAGE_FOLDER / "imputations" / "imputed_gains.csv.gz" + ).imputed_gains.values + self.set_input("capital_gains", 2023, capital_gains) + class IndividualSim(CoreIndividualSim): # Deprecated tax_benefit_system = CountryTaxBenefitSystem diff --git a/policyengine_uk/variables/gov/hmrc/capital_gains_tax/capital_gains_tax.py b/policyengine_uk/variables/gov/hmrc/capital_gains_tax/capital_gains_tax.py new file mode 100644 index 000000000..59e04d6d5 --- /dev/null +++ b/policyengine_uk/variables/gov/hmrc/capital_gains_tax/capital_gains_tax.py @@ -0,0 +1,27 @@ +from policyengine_uk.model_api import * + + +class capital_gains_tax(Variable): + label = "capital gains tax" + entity = Person + definition_period = YEAR + value_type = float + unit = GBP + + def formula(person, period, parameters): + hmrc = parameters(period).gov.hmrc + cgt = hmrc.cgt + it = hmrc.income_tax + + ani = person("adjusted_net_income", period) + gains = max_(0, person("capital_gains", period)) + basic_rate_limit = it.rates.uk.thresholds[1] + remaining_basic_rate_band = max_(basic_rate_limit - ani, 0) + + basic_rate_applicable_cg = min_(gains, remaining_basic_rate_band) + higher_rate_applicable_cg = max_(gains - remaining_basic_rate_band, 0) + + basic_rate_tax = basic_rate_applicable_cg * cgt.basic_rate + higher_rate_tax = higher_rate_applicable_cg * cgt.higher_rate + + return basic_rate_tax + higher_rate_tax diff --git a/policyengine_uk/variables/gov/hmrc/tax.py b/policyengine_uk/variables/gov/hmrc/tax.py index cb6e6537c..c0ff140ba 100644 --- a/policyengine_uk/variables/gov/hmrc/tax.py +++ b/policyengine_uk/variables/gov/hmrc/tax.py @@ -35,6 +35,7 @@ class household_tax(Variable): "LVT", "carbon_tax", "vat_change", + "capital_gains_tax", ] def formula(household, period, parameters): diff --git a/policyengine_uk/variables/household/income/income.py b/policyengine_uk/variables/household/income/income.py index 8efbc361c..d2b32eb95 100644 --- a/policyengine_uk/variables/household/income/income.py +++ b/policyengine_uk/variables/household/income/income.py @@ -383,3 +383,11 @@ class statutory_sick_pay(Variable): definition_period = YEAR value_type = float unit = GBP + + +class capital_gains(Variable): + label = "capital gains" + entity = Person + definition_period = YEAR + value_type = float + unit = GBP