From 62c11ad75be2d51dd03e2faf964637f9b5c10b1c Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com> Date: Thu, 26 Sep 2024 13:00:12 +0100 Subject: [PATCH] Bug fixes (#282) * Fix Make `random(entity)` deterministic based on entity IDs #280 * Fix Subsampling doesn't preserve `Dataset.time_period` #281 * Fix Remove openfisca_core dependencies #259 * Versioning * Remove Windows test * Remove Windows CI --- .github/workflows/pr.yaml | 5 +- .github/workflows/push.yaml | 5 +- changelog_entry.yaml | 5 + policyengine_core/commons/formulas.py | 48 +- policyengine_core/simulations/simulation.py | 2 +- policyengine_core/variables/defined_for.py | 8 +- setup.py | 14 +- test.ipynb | 491 ++++++++++++++++++++ 8 files changed, 549 insertions(+), 29 deletions(-) create mode 100644 test.ipynb diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4c737d784..64cfaf519 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -31,10 +31,7 @@ jobs: - name: Check version number has been properly updated run: .github/is-version-number-acceptable.sh Test: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, windows-latest] + runs-on: ubuntu-latest steps: - name: Checkout repo uses: actions/checkout@v3 diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index f8add96ec..9cf4a8cf5 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -43,13 +43,10 @@ jobs: author_name: Github Actions[bot] message: Update PolicyEngine Core Test: - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest if: | (github.repository == 'PolicyEngine/policyengine-core') && (github.event.head_commit.message == 'Update PolicyEngine Core') - strategy: - matrix: - os: [ubuntu-latest, windows-latest] steps: - name: Checkout repo uses: actions/checkout@v3 diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..72671cca7 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,5 @@ +- bump: minor + changes: + added: + - Randomness based on entity IDs as seeds. + - OpenFisca-Core imports. diff --git a/policyengine_core/commons/formulas.py b/policyengine_core/commons/formulas.py index 169319cc5..4695cf9c6 100644 --- a/policyengine_core/commons/formulas.py +++ b/policyengine_core/commons/formulas.py @@ -300,15 +300,45 @@ def amount_between( return clip(amount, threshold_1, threshold_2) - threshold_1 -def random(entity, reset=True): - if reset: - np.random.seed(0) - x = np.random.rand(entity.count) - if entity.simulation.has_axes: - # Generate the same random number for each entity. - random_number = x[0] - return np.array([random_number] * entity.count) - return x +def random(population): + """ + Generate random values for each entity in the population. + + Args: + population: The population object containing simulation data. + + Returns: + np.ndarray: Array of random values for each entity. + """ + # Initialize count of random calls if not already present + if not hasattr(population.simulation, "count_random_calls"): + population.simulation.count_random_calls = 0 + population.simulation.count_random_calls += 1 + + # Get known periods or use default calculation period + known_periods = population.simulation.get_holder( + f"{population.entity.key}_id" + ).get_known_periods() + period = ( + known_periods[0] + if known_periods + else population.simulation.default_calculation_period + ) + + # Get entity IDs for the period + entity_ids = population(f"{population.entity.key}_id", period) + + # Generate random values for each entity + values = np.array( + [ + np.random.default_rng( + seed=id * 100 + population.simulation.count_random_calls + ).random() + for id in entity_ids + ] + ) + + return values def is_in(values: ArrayLike, *targets: list) -> ArrayLike: diff --git a/policyengine_core/simulations/simulation.py b/policyengine_core/simulations/simulation.py index a7d253e3c..482c90e42 100644 --- a/policyengine_core/simulations/simulation.py +++ b/policyengine_core/simulations/simulation.py @@ -1547,7 +1547,7 @@ def subsample( ) # Update the dataset and rebuild the simulation - self.dataset = Dataset.from_dataframe(df) + self.dataset = Dataset.from_dataframe(df, self.dataset.time_period) self.build_from_dataset() return self diff --git a/policyengine_core/variables/defined_for.py b/policyengine_core/variables/defined_for.py index e6e194e93..29a2a2e1a 100644 --- a/policyengine_core/variables/defined_for.py +++ b/policyengine_core/variables/defined_for.py @@ -2,10 +2,10 @@ import numpy as np from numpy.typing import ArrayLike -from openfisca_core.entities import Entity -from openfisca_core.populations import GroupPopulation, Population -from openfisca_core.projectors import EntityToPersonProjector, Projector -from openfisca_core.variables import Variable +from policyengine_core.entities import Entity +from policyengine_core.populations import GroupPopulation, Population +from policyengine_core.projectors import EntityToPersonProjector, Projector +from policyengine_core.variables import Variable class CallableSubset: diff --git a/setup.py b/setup.py index 66a8325e8..753c32583 100644 --- a/setup.py +++ b/setup.py @@ -12,25 +12,25 @@ general_requirements = [ "pytest>=8,<9", "numpy~=1.26.4", - "black", - "linecheck<1", - "yaml-changelog<1", - "coverage", "sortedcontainers<3", "numexpr<3", "dpath<3", "psutil<6", "wheel<1", "h5py>=3,<4", - "requests>=2.27.1,<3", + "requests>=2,<3", "pandas>=1", - "plotly>=5.6.0,<6", - "ipython>=7.17.0,<8", + "plotly>=5,<6", + "ipython>=7,<8", "pyvis>=0.3.2", ] dev_requirements = [ + "black", + "linecheck<1", "jupyter-book<1", + "yaml-changelog<1", + "coverage", "furo<2023", "markupsafe==2.0.1", "coverage", diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 000000000..27c0bb532 --- /dev/null +++ b/test.ipynb @@ -0,0 +1,491 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from policyengine_uk import Microsimulation\n", + "from policyengine_core.reforms import Reform\n", + "\n", + "reform = Reform.from_dict({\n", + " \"gov.hmrc.vat.standard_rate\": {\n", + " \"2024-01-01.2100-12-31\": 0.22\n", + " }\n", + "}, country_id=\"uk\")\n", + "\n", + "\n", + "baseline = Microsimulation()\n", + "reformed = Microsimulation(reform=reform)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agevatconsumptionfull_rate_vat_expenditure_ratefood_and_non_alcoholic_beverages_consumptionalcohol_and_tobacco_consumptionclothing_and_footwear_consumptionhousing_water_and_electricity_consumptionhousehold_furnishings_consumptionhealth_consumptiontransport_consumptioncommunication_consumptionrecreation_consumptioneducation_consumptionrestaurants_and_hotels_consumptionmiscellaneous_consumptionpetrol_spendingdiesel_spendingdomestic_energy_consumption
31280.0-929.9658812118.353027-0.8416081270.2747800.00.0614.1043090.00.00.000000.0187.9159090.00.046.0578230.00.0283.716187
103980.0-129.693314-1217.7687990.19485185.3604970.00.0-1903.7233890.00.00.000000.0291.6995540.00.0308.8944700.00.0666.303162
104052.0-129.693314-1217.7687990.19485185.3604970.00.0-1903.7233890.00.00.000000.0291.6995540.00.0308.8944700.00.0666.303162
189680.0-593.5120241351.950684-0.841608461.8064270.00.0666.9172970.00.00.000000.0187.9159090.00.035.3109970.00.0354.338196
205735.0-40.791981-1168.0263670.058855692.4025880.00.0-2319.1647950.00.030.091110.0187.9159090.00.0240.7288820.00.0-2926.207031
............................................................
20930750.0-9.732808-125.5843200.139750131.4183200.00.0-424.3460690.00.00.000000.00.0000000.00.0167.3434140.00.00.000000
20930849.0-9.732808-125.5843200.139750131.4183200.00.0-424.3460690.00.00.000000.00.0000000.00.0167.3434140.00.00.000000
20930913.0-9.732808-125.5843200.139750131.4183200.00.0-424.3460690.00.00.000000.00.0000000.00.0167.3434140.00.00.000000
21216968.0-23.210276-378.2883300.1090761337.5191650.00.0-1903.7233890.00.00.000000.0187.9159090.00.00.0000000.00.0638.054382
21217065.0-23.210276-378.2883300.1090761337.5191650.00.0-1903.7233890.00.00.000000.0187.9159090.00.00.0000000.00.0638.054382
\n", + "

464 rows × 19 columns

\n", + "
" + ], + "text/plain": [ + " age vat consumption full_rate_vat_expenditure_rate \\\n", + "312 80.0 -929.965881 2118.353027 -0.841608 \n", + "1039 80.0 -129.693314 -1217.768799 0.194851 \n", + "1040 52.0 -129.693314 -1217.768799 0.194851 \n", + "1896 80.0 -593.512024 1351.950684 -0.841608 \n", + "2057 35.0 -40.791981 -1168.026367 0.058855 \n", + "... ... ... ... ... \n", + "209307 50.0 -9.732808 -125.584320 0.139750 \n", + "209308 49.0 -9.732808 -125.584320 0.139750 \n", + "209309 13.0 -9.732808 -125.584320 0.139750 \n", + "212169 68.0 -23.210276 -378.288330 0.109076 \n", + "212170 65.0 -23.210276 -378.288330 0.109076 \n", + "\n", + " food_and_non_alcoholic_beverages_consumption \\\n", + "312 1270.274780 \n", + "1039 85.360497 \n", + "1040 85.360497 \n", + "1896 461.806427 \n", + "2057 692.402588 \n", + "... ... \n", + "209307 131.418320 \n", + "209308 131.418320 \n", + "209309 131.418320 \n", + "212169 1337.519165 \n", + "212170 1337.519165 \n", + "\n", + " alcohol_and_tobacco_consumption clothing_and_footwear_consumption \\\n", + "312 0.0 0.0 \n", + "1039 0.0 0.0 \n", + "1040 0.0 0.0 \n", + "1896 0.0 0.0 \n", + "2057 0.0 0.0 \n", + "... ... ... \n", + "209307 0.0 0.0 \n", + "209308 0.0 0.0 \n", + "209309 0.0 0.0 \n", + "212169 0.0 0.0 \n", + "212170 0.0 0.0 \n", + "\n", + " housing_water_and_electricity_consumption \\\n", + "312 614.104309 \n", + "1039 -1903.723389 \n", + "1040 -1903.723389 \n", + "1896 666.917297 \n", + "2057 -2319.164795 \n", + "... ... \n", + "209307 -424.346069 \n", + "209308 -424.346069 \n", + "209309 -424.346069 \n", + "212169 -1903.723389 \n", + "212170 -1903.723389 \n", + "\n", + " household_furnishings_consumption health_consumption \\\n", + "312 0.0 0.0 \n", + "1039 0.0 0.0 \n", + "1040 0.0 0.0 \n", + "1896 0.0 0.0 \n", + "2057 0.0 0.0 \n", + "... ... ... \n", + "209307 0.0 0.0 \n", + "209308 0.0 0.0 \n", + "209309 0.0 0.0 \n", + "212169 0.0 0.0 \n", + "212170 0.0 0.0 \n", + "\n", + " transport_consumption communication_consumption \\\n", + "312 0.00000 0.0 \n", + "1039 0.00000 0.0 \n", + "1040 0.00000 0.0 \n", + "1896 0.00000 0.0 \n", + "2057 30.09111 0.0 \n", + "... ... ... \n", + "209307 0.00000 0.0 \n", + "209308 0.00000 0.0 \n", + "209309 0.00000 0.0 \n", + "212169 0.00000 0.0 \n", + "212170 0.00000 0.0 \n", + "\n", + " recreation_consumption education_consumption \\\n", + "312 187.915909 0.0 \n", + "1039 291.699554 0.0 \n", + "1040 291.699554 0.0 \n", + "1896 187.915909 0.0 \n", + "2057 187.915909 0.0 \n", + "... ... ... \n", + "209307 0.000000 0.0 \n", + "209308 0.000000 0.0 \n", + "209309 0.000000 0.0 \n", + "212169 187.915909 0.0 \n", + "212170 187.915909 0.0 \n", + "\n", + " restaurants_and_hotels_consumption miscellaneous_consumption \\\n", + "312 0.0 46.057823 \n", + "1039 0.0 308.894470 \n", + "1040 0.0 308.894470 \n", + "1896 0.0 35.310997 \n", + "2057 0.0 240.728882 \n", + "... ... ... \n", + "209307 0.0 167.343414 \n", + "209308 0.0 167.343414 \n", + "209309 0.0 167.343414 \n", + "212169 0.0 0.000000 \n", + "212170 0.0 0.000000 \n", + "\n", + " petrol_spending diesel_spending domestic_energy_consumption \n", + "312 0.0 0.0 283.716187 \n", + "1039 0.0 0.0 666.303162 \n", + "1040 0.0 0.0 666.303162 \n", + "1896 0.0 0.0 354.338196 \n", + "2057 0.0 0.0 -2926.207031 \n", + "... ... ... ... \n", + "209307 0.0 0.0 0.000000 \n", + "209308 0.0 0.0 0.000000 \n", + "209309 0.0 0.0 0.000000 \n", + "212169 0.0 0.0 638.054382 \n", + "212170 0.0 0.0 638.054382 \n", + "\n", + "[464 rows x 19 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "IMPUTATIONS = [\n", + " \"food_and_non_alcoholic_beverages_consumption\",\n", + " \"alcohol_and_tobacco_consumption\",\n", + " \"clothing_and_footwear_consumption\",\n", + " \"housing_water_and_electricity_consumption\",\n", + " \"household_furnishings_consumption\",\n", + " \"health_consumption\",\n", + " \"transport_consumption\",\n", + " \"communication_consumption\",\n", + " \"recreation_consumption\",\n", + " \"education_consumption\",\n", + " \"restaurants_and_hotels_consumption\",\n", + " \"miscellaneous_consumption\",\n", + " \"petrol_spending\",\n", + " \"diesel_spending\",\n", + " \"domestic_energy_consumption\",\n", + "]\n", + "baseline_income = baseline.calculate(\"household_net_income\", period=2024, map_to=\"person\")\n", + "reformed_income = reformed.calculate(\"household_net_income\", period=2024, map_to=\"person\")\n", + "difference_income = reformed_income - baseline_income\n", + "\n", + "baseline.calculate_dataframe([\"age\", \"vat\", \"consumption\", \"full_rate_vat_expenditure_rate\", *IMPUTATIONS])[difference_income > 0]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}