Skip to content

Commit

Permalink
Merge branch 'PolicyEngine:master' into parameternode-274
Browse files Browse the repository at this point in the history
  • Loading branch information
SylviaDu99 authored Nov 12, 2024
2 parents b4731ec + b2d6ffa commit 01ef1a3
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 14 deletions.
35 changes: 35 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,36 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [3.12.4] - 2024-11-11 14:20:12

### Fixed

- Datasets writing downloaded data now use an atomic_write to write it to disk. This prevents other processes attempting to read a partial file or clobbering each other.

## [3.12.3] - 2024-11-04 16:29:34

### Fixed

- Bug in labour supply responses.

## [3.12.2] - 2024-11-01 21:39:35

### Added

- Compatibility settings for editable installs

## [3.12.1] - 2024-11-01 11:36:53

### Fixed

- Bug causing Enums to fail in some simulations.

## [3.12.0] - 2024-10-30 18:46:15

### Changed

- update the ipython requirement to version 8

## [3.11.1] - 2024-10-29 20:04:02

### Changed
Expand Down Expand Up @@ -894,6 +924,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0



[3.12.4]: https://github.com/PolicyEngine/policyengine-core/compare/3.12.3...3.12.4
[3.12.3]: https://github.com/PolicyEngine/policyengine-core/compare/3.12.2...3.12.3
[3.12.2]: https://github.com/PolicyEngine/policyengine-core/compare/3.12.1...3.12.2
[3.12.1]: https://github.com/PolicyEngine/policyengine-core/compare/3.12.0...3.12.1
[3.12.0]: https://github.com/PolicyEngine/policyengine-core/compare/3.11.1...3.12.0
[3.11.1]: https://github.com/PolicyEngine/policyengine-core/compare/3.11.0...3.11.1
[3.11.0]: https://github.com/PolicyEngine/policyengine-core/compare/3.10.0...3.11.0
[3.10.0]: https://github.com/PolicyEngine/policyengine-core/compare/3.9.0...3.10.0
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ format:
black . -l 79

install:
pip install -e .[dev]
pip install -e ".[dev]" --config-settings editable_mode=compat
pip install policyengine-us
pip install policyengine-uk

Expand Down
27 changes: 27 additions & 0 deletions changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -721,3 +721,30 @@
changed:
- Replace custom implementation of microdf with deployed version
date: 2024-10-29 20:04:02
- bump: minor
changes:
changed:
- update the ipython requirement to version 8
date: 2024-10-30 18:46:15
- bump: patch
changes:
fixed:
- Bug causing Enums to fail in some simulations.
date: 2024-11-01 11:36:53
- bump: patch
changes:
added:
- Compatibility settings for editable installs
date: 2024-11-01 21:39:35
- bump: patch
changes:
fixed:
- Bug in labour supply responses.
date: 2024-11-04 16:29:34
- bump: patch
changes:
fixed:
- Datasets writing downloaded data now use an atomic_write to write it to disk.
This prevents other processes attempting to read a partial file or clobbering
each other.
date: 2024-11-11 14:20:12
33 changes: 30 additions & 3 deletions policyengine_core/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,34 @@
import shutil
import requests
import os
import tempfile


def atomic_write(file: Path, content: bytes) -> None:
"""
Atomically update the target file with the content. Any existing file will be unlinked rather than overritten.
Implemented by
1. Downloading the file to a temporary file with a unique name
2. renaming (not copying) the file to the target name so that the operation is atomic (either the file is there or it's not, no partial file)
If a process is reading the original file when a new file is renamed, that should relink the file, not clear and overwrite the old one so
both processes should continue happily.
"""
with tempfile.NamedTemporaryFile(
mode="wb",
dir=file.parent.absolute().as_posix(),
prefix=file.name + ".download.",
delete=False,
) as f:
try:
f.write(content)
f.close()
os.rename(f.name, file.absolute().as_posix())
except:
f.delete = True
f.close()
raise


class Dataset:
Expand Down Expand Up @@ -278,7 +306,7 @@ def store_file(self, file_path: str):
raise FileNotFoundError(f"File {file_path} does not exist.")
shutil.move(file_path, self.file_path)

def download(self, url: str = None):
def download(self, url: str = None) -> None:
"""Downloads a file to the dataset's file path.
Args:
Expand Down Expand Up @@ -333,8 +361,7 @@ def download(self, url: str = None):
f"Invalid response code {response.status_code} for url {url}."
)

with open(self.file_path, "wb") as f:
f.write(response.content)
atomic_write(self.file_path, response.content)

def remove(self):
"""Removes the dataset from disk."""
Expand Down
29 changes: 21 additions & 8 deletions policyengine_core/simulations/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,15 +635,20 @@ def _calculate(
return value

if variable.requires_computation_after is not None:
if variable.requires_computation_after not in [
variables_in_stack = [
node.get("name") for node in self.tracer.stack
]:
]
variable_in_stack = (
variable.requires_computation_after in variables_in_stack
)
required_is_known_periods = self.get_holder(
variable.requires_computation_after
).get_known_periods()
if (not variable_in_stack) and (
not len(required_is_known_periods) > 0
):
raise ValueError(
f"Variable {variable_name} requires {variable.requires_computation_after} to be requested first. The full stack is: "
+ "\n".join(
f" - {node.get('name')} {node.get('period')}, {node.get('branch_name')}"
for node in self.tracer.stack
)
f"Variable {variable_name} requires {variable.requires_computation_after} to be requested first. That variable is known in: {required_is_known_periods}. The full stack is: {variables_in_stack}. {variable_in_stack, len(required_is_known_periods) > 0}"
)
alternate_period_handling = False
if variable.definition_period == MONTH and period.unit == YEAR:
Expand Down Expand Up @@ -739,7 +744,15 @@ def _calculate(
array = holder.default_array()

if variable.defined_for is not None:
array = np.where(mask, array, np.zeros_like(array))
array = np.where(mask, array, variable.default_value)
if variable.value_type == Enum:
array = np.array(
[
item.index if isinstance(item, Enum) else item
for item in array
]
)
array = EnumArray(array, variable.possible_values)

array = self._cast_formula_result(array, variable)
holder.put_in_cache(array, period, self.branch_name)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"requests>=2,<3",
"pandas>=1",
"plotly>=5,<6",
"ipython>=7,<8",
"ipython>=8,<9",
"pyvis>=0.3.2",
"microdf_python>=0.4.3",
]
Expand All @@ -48,7 +48,7 @@

setup(
name="policyengine-core",
version="3.11.1",
version="3.12.4",
author="PolicyEngine",
author_email="[email protected]",
classifiers=[
Expand Down
19 changes: 19 additions & 0 deletions tests/core/data/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path
from tempfile import NamedTemporaryFile


def test_dataset_class():
Expand All @@ -24,3 +25,21 @@ def generate(self) -> None:
assert test_dataset.exists
test_dataset.remove()
assert not test_dataset.exists


def test_atomic_write():
from policyengine_core.data.dataset import atomic_write

with NamedTemporaryFile(mode="w") as file:
file.write("Hello, world\n")
file.flush()
# Open the file before overwriting
with open(file.name, "r") as file_original:

atomic_write(Path(file.name), "NOPE\n".encode())

# Open file descriptor still points to the old node
assert file_original.readline() == "Hello, world\n"
# But if I open it again it has the new content
with open(file.name, "r") as file_updated:
assert file_updated.readline() == "NOPE\n"

0 comments on commit 01ef1a3

Please sign in to comment.