diff --git a/examples/build.yml b/examples/build.yml index 8fa92c8a..d197a1fa 100644 --- a/examples/build.yml +++ b/examples/build.yml @@ -53,20 +53,20 @@ setup_irrigation_sources: setup_create_farms_simple: +setup_household_characteristics: + setup_crops_from_source: source: MIRCA2000 + +determine_crop_area_fractions: + +setup_farmer_crop_calendar: + year: 2000 + reduce_crops: true + replace_base: false setup_farmer_characteristics_simple: - risk_aversion_mean: 2 - risk_aversion_standard_deviation: 1 - discount_rate: 0.05 interest_rate: 0.05 - irrigation_choice: - canal: 80 - well: 20 - -setup_cultivation_costs: - cultivation_costs: 0 setup_crop_prices: crop_prices: FAO_stat diff --git a/geb/HRUs.py b/geb/HRUs.py index 99b6453b..278de54d 100644 --- a/geb/HRUs.py +++ b/geb/HRUs.py @@ -13,6 +13,7 @@ import numpy as np import zarr.convenience from geb.workflows import AsyncForcingReader +from scipy.spatial import cKDTree try: import cupy as cp @@ -20,6 +21,25 @@ pass +def determine_nearest_river_cell(river_grid, HRU_to_grid): + threshold = 15 + valid_mask = river_grid != -9999 + + valid_indices = np.argwhere(valid_mask) + valid_values = river_grid[valid_mask] + + above_threshold_mask = valid_values > threshold + above_threshold_indices = valid_indices[above_threshold_mask] + above_threshold_indices_in_valid = np.flatnonzero(above_threshold_mask) + + tree = cKDTree(above_threshold_indices) + distances, indices_in_above = tree.query(valid_indices) + + nearest_indices_in_valid = above_threshold_indices_in_valid[indices_in_above] + + return nearest_indices_in_valid[HRU_to_grid] + + def load_grid(filepath, layer=1, return_transform_and_crs=False): if filepath.suffix == ".tif": warnings.warn("tif files are now deprecated. Consider rebuilding the model.") @@ -568,6 +588,11 @@ def __init__(self, data, model) -> None: self.data.get_save_state_path(), "HRU.unmerged_HRU_indices.npz" ) )["data"] + self.nearest_river_grid_cell = np.load( + os.path.join( + self.data.get_save_state_path(), "HRU.nearest_river_grid_cell.npz" + ) + )["data"] else: ( self.land_use_type, @@ -577,12 +602,22 @@ def __init__(self, data, model) -> None: self.grid_to_HRU, self.unmerged_HRU_indices, ) = self.create_HRUs() + + river_grid = load_grid( + self.model.files["grid"]["routing/kinematic/upstream_area"] + ) + + self.nearest_river_grid_cell = determine_nearest_river_cell( + river_grid, self.HRU_to_grid + ) + self.register_initial_data("HRU.land_use_type") self.register_initial_data("HRU.land_use_ratio") self.register_initial_data("HRU.land_owners") self.register_initial_data("HRU.HRU_to_grid") self.register_initial_data("HRU.grid_to_HRU") self.register_initial_data("HRU.unmerged_HRU_indices") + self.register_initial_data("HRU.nearest_river_grid_cell") if self.model.use_gpu: self.land_use_type = cp.array(self.land_use_type) BaseVariables.__init__(self) diff --git a/geb/agents/crop_farmers.py b/geb/agents/crop_farmers.py index 09242ddb..920b66f8 100644 --- a/geb/agents/crop_farmers.py +++ b/geb/agents/crop_farmers.py @@ -4,7 +4,7 @@ import json import copy import calendar -from typing import Any, Dict, Tuple, Union +from typing import Tuple, Union from scipy.stats import genextreme from scipy.optimize import curve_fit @@ -355,10 +355,12 @@ def abstract_water( field_indices_by_farmer: np.ndarray, field_indices: np.ndarray, irrigation_efficiency: np.ndarray, + fraction_irrigated_field: np.ndarray, surface_irrigated: np.ndarray, well_irrigated: np.ndarray, cell_area: np.ndarray, HRU_to_grid: np.ndarray, + nearest_river_grid_cell: np.ndarray, crop_map: np.ndarray, field_is_paddy_irrigated: np.ndarray, paddy_level: np.ndarray, @@ -437,16 +439,19 @@ def abstract_water( ).all() # currently only support situation where all fields are growing crops, or all fields are fallow continue + farmer_has_access_to_irrigation_water = False + # Determine whether farmer would have access to irrigation water this timestep. Regardless of whether the water is actually used. This is used for making investment decisions. for field in farmer_fields: grid_cell = HRU_to_grid[field] + grid_cell_nearest = nearest_river_grid_cell[field] if well_irrigated[farmer]: if groundwater_depth[grid_cell] < well_depth[farmer]: farmer_has_access_to_irrigation_water = True break elif surface_irrigated[farmer]: - if available_channel_storage_m3[grid_cell] > 0: + if available_channel_storage_m3[grid_cell_nearest] > 0: farmer_has_access_to_irrigation_water = True break command_area = farmer_command_area[farmer] @@ -462,6 +467,7 @@ def abstract_water( # If farmer doesn't have access to irrigation water, skip the irrigation abstraction if farmer_has_access_to_irrigation_water: irrigation_efficiency_farmer = irrigation_efficiency[farmer] + fraction_irrigated_field_farmer = fraction_irrigated_field[farmer] # Calculate the potential irrigation consumption for the farmer if field_is_paddy_irrigated[farmer_fields][0]: @@ -491,9 +497,12 @@ def abstract_water( 0.0, ) # limit the irrigation consumption to the potential infiltration capacity - potential_irrigation_consumption_m = np.minimum( - potential_irrigation_consumption_m, - potential_infiltration_capacity[farmer_fields], + potential_irrigation_consumption_m = ( + np.minimum( + potential_irrigation_consumption_m, + potential_infiltration_capacity[farmer_fields], + ) + * fraction_irrigated_field_farmer ) assert (potential_irrigation_consumption_m >= 0).all() @@ -505,7 +514,7 @@ def abstract_water( if potential_irrigation_consumption_farmer_m3 > 0.0: # if irrigation limit is active, reduce the irrigation demand if not np.isnan(remaining_irrigation_limit_m3[farmer]): - potential_irrigation_consumption_farmer_m3 = adjust_irrigation_to_limit( + potential_irrigation_consumption_m = adjust_irrigation_to_limit( farmer=farmer, day_index=day_index, remaining_irrigation_limit_m3=remaining_irrigation_limit_m3, @@ -516,6 +525,9 @@ def abstract_water( potential_irrigation_consumption_m=potential_irrigation_consumption_m, potential_irrigation_consumption_farmer_m3=potential_irrigation_consumption_farmer_m3, ) + potential_irrigation_consumption_farmer_m3 = ( + potential_irrigation_consumption_m * cell_area[farmer_fields] + ).sum() # loop through all farmers fields and apply irrigation for field_idx, field in enumerate(farmer_fields): @@ -529,7 +541,7 @@ def abstract_water( if surface_irrigated[farmer]: irrigation_water_demand_field = withdraw_channel( available_channel_storage_m3=available_channel_storage_m3, - grid_cell=grid_cell, + grid_cell=grid_cell_nearest, cell_area=cell_area, field=field, farmer=farmer, @@ -728,6 +740,170 @@ def plant( return plant, farmers_selling_land +@njit(cache=True) +def arrays_equal(a, b): + for i in range(a.size): + if a.flat[i] != b.flat[i]: + return True if a.flat[i] != b.flat[i] else False + return True + + +@njit(cache=True) +def find_matching_rows(arr, target_row): + n_rows = arr.shape[0] + matches = np.empty(n_rows, dtype=np.bool_) + for i in range(n_rows): + matches[i] = True + for j in range(arr.shape[1]): + if arr[i, j] != target_row[j]: + matches[i] = False + break + return matches + + +@njit(cache=True) +def find_most_similar_index(target_series, yield_ratios, groups): + n = groups.size + indices = [] + for i in range(n): + if groups[i]: + indices.append(i) + n_indices = len(indices) + distances = np.empty(n_indices, dtype=yield_ratios.dtype) + for idx in range(n_indices): + i = indices[idx] + diff = yield_ratios[i] - target_series + distances[idx] = np.linalg.norm(diff) + min_idx = 0 + min_dist = distances[0] + for idx in range(1, n_indices): + if distances[idx] < min_dist: + min_dist = distances[idx] + min_idx = idx + return indices[min_idx] + + +@njit(cache=True) +def crop_yield_ratio_difference_test_njit( + yield_ratios, + crop_elevation_group, + unique_crop_groups, + group_indices, + crop_calendar, + unique_crop_calendars, + p_droughts, +): + n_groups = len(unique_crop_groups) + n_calendars = len(unique_crop_calendars) + n_droughts = len(p_droughts) + + unique_yield_ratio_gain = np.full( + (n_groups, n_calendars, n_droughts), + 0.0, + dtype=np.float32, + ) + + id_to_switch_to = np.full( + (n_groups, n_calendars), + -1, + dtype=np.int32, + ) + + crop_to_switch_to = np.full( + (n_groups, n_calendars), + -1, + dtype=np.int32, + ) + + for group_id in range(n_groups): + unique_group = unique_crop_groups[group_id] + unique_farmer_groups = find_matching_rows(crop_elevation_group, unique_group) + + # Identify the adapted counterparts of the current group + mask = np.empty(n_calendars, dtype=np.bool_) + for i in range(n_calendars): + match = True + for j in range(unique_crop_calendars.shape[1]): + if unique_crop_calendars[i, j] != unique_group[j]: + match = False + break + mask[i] = not match + + # Collect candidate crop rotations + candidate_crop_rotations = [] + for i in range(n_calendars): + if mask[i]: + candidate_crop_rotations.append(unique_crop_calendars[i]) + + num_candidates = len(candidate_crop_rotations) + + # Loop over the counterparts + for crop_id in range(num_candidates): + unique_rotation = candidate_crop_rotations[crop_id] + farmer_class = unique_group[-1] + basin_location = unique_group[-2] + unique_group_other_crop = np.empty( + len(unique_rotation) + 2, dtype=unique_rotation.dtype + ) + unique_group_other_crop[: len(unique_rotation)] = unique_rotation + unique_group_other_crop[len(unique_rotation)] = basin_location + unique_group_other_crop[len(unique_rotation) + 1] = farmer_class + + unique_farmer_groups_other_crop = find_matching_rows( + crop_elevation_group, unique_group_other_crop + ) + + if np.any(unique_farmer_groups_other_crop): + # Compute current yield ratio mean + current_sum = np.zeros(n_droughts, dtype=yield_ratios.dtype) + count_current = 0 + for i in range(unique_farmer_groups.size): + if unique_farmer_groups[i]: + current_sum += yield_ratios[i] + count_current += 1 + current_yield_ratio = ( + current_sum / count_current if count_current > 0 else current_sum + ) + + # Compute candidate yield ratio mean + candidate_sum = np.zeros(n_droughts, dtype=yield_ratios.dtype) + count_candidate = 0 + for i in range(unique_farmer_groups_other_crop.size): + if unique_farmer_groups_other_crop[i]: + candidate_sum += yield_ratios[i] + count_candidate += 1 + candidate_yield_ratio = ( + candidate_sum / count_candidate + if count_candidate > 0 + else candidate_sum + ) + + yield_ratio_gain = candidate_yield_ratio - current_yield_ratio + + crop_to_switch_to[group_id, crop_id] = unique_rotation[0] + + if np.all(np.isnan(yield_ratio_gain)): + yield_ratio_gain = np.zeros_like(yield_ratio_gain) + else: + id_to_switch_to[group_id, crop_id] = find_most_similar_index( + yield_ratio_gain, + yield_ratios, + unique_farmer_groups_other_crop, + ) + + unique_yield_ratio_gain[group_id, crop_id, :] = yield_ratio_gain + + gains_adaptation = unique_yield_ratio_gain[group_indices, :, :] + new_crop_nr = crop_to_switch_to[group_indices, :] + new_farmer_id = id_to_switch_to[group_indices, :] + + return ( + gains_adaptation, + new_crop_nr, + new_farmer_id, + ) + + class CropFarmers(AgentBaseClass): """The agent class for the farmers. Contains all data and behaviourial methods. The __init__ function only gets the model as arguments, the agent parent class and the redundancy. All other variables are loaded at later stages. @@ -791,7 +967,7 @@ def __init__(self, model, agents, reduncancy: float) -> None: self.max_initial_sat_thickness = self.model.config["agent_settings"]["farmers"][ "expected_utility" ]["adaptation_well"]["max_initial_sat_thickness"] - self.lifespan = self.model.config["agent_settings"]["farmers"][ + self.lifespan_well = self.model.config["agent_settings"]["farmers"][ "expected_utility" ]["adaptation_well"]["lifespan"] self.pump_efficiency = self.model.config["agent_settings"]["farmers"][ @@ -812,6 +988,11 @@ def __init__(self, model, agents, reduncancy: float) -> None: self.water_costs_m3_reservoir = 0.20 self.water_costs_m3_groundwater = 0.20 + # Irr efficiency variables + self.lifespan_irrigation = self.model.config["agent_settings"]["farmers"][ + "expected_utility" + ]["adaptation_well"]["lifespan"] + self.elevation_subgrid = load_grid( self.model.files["MERIT_grid"]["landsurface/topo/subgrid_elevation"], return_transform_and_crs=True, @@ -852,10 +1033,6 @@ def __init__(self, model, agents, reduncancy: float) -> None: 30, ) - self.yield_ratio_multiplier_value = self.model.config["agent_settings"][ - "farmers" - ]["expected_utility"]["adaptation_sprinkler"]["yield_multiplier"] - self.var.actual_evapotranspiration_crop_life = self.var.load_initial( "actual_evapotranspiration_crop_life", default=lambda: self.var.full_compressed(0, dtype=np.float32, gpu=False), @@ -929,13 +1106,6 @@ def initiate(self) -> None: self.model.files["binary"]["agents/farmers/risk_aversion"] )["data"] - self.interest_rate = AgentArray( - n=self.n, max_n=self.max_n, dtype=np.float32, fill_value=np.nan - ) - self.interest_rate[:] = np.load( - self.model.files["binary"]["agents/farmers/interest_rate"] - )["data"] - self.discount_rate = AgentArray( n=self.n, max_n=self.max_n, dtype=np.float32, fill_value=np.nan ) @@ -946,10 +1116,10 @@ def initiate(self) -> None: self.intention_factor = AgentArray( n=self.n, max_n=self.max_n, dtype=np.float32, fill_value=np.nan ) - np.random.seed(42) - self.intention_factor[:] = np.random.uniform( - 0, 0.6, size=self.intention_factor.shape - ) + + self.intention_factor[:] = np.load( + self.model.files["binary"]["agents/farmers/intention_factor"] + )["data"] # Load the region_code of each farmer. self.region_id = AgentArray( @@ -969,40 +1139,26 @@ def initiate(self) -> None: max_n=self.max_n, ) - # Initiate adaptation status. 0 = not adapted, 1 adapted. Column 0 = no cost adaptation, 1 = well, 2 = sprinkler + # Initiate adaptation status. + # 0 = not adapted, 1 adapted. Column 0 = no cost adaptation, 1 = well, 2 = irr efficiency, 3 = irr. field expansion self.adapted = AgentArray( n=self.n, max_n=self.max_n, - extra_dims=(3,), + extra_dims=(4,), extra_dims_names=("adaptation_type",), dtype=np.int32, fill_value=0, ) - # the time each agent has been paying off their dry flood proofing investment loan. Column 0 = no cost adaptation, 1 = well, 2 = sprinkler. -1 if they do not have adaptations + # the time each agent has been paying off their loan + # 0 = no cost adaptation, 1 = well, 2 = irr efficiency, 3 = irr. field expansion -1 if they do not have adaptations self.time_adapted = AgentArray( n=self.n, max_n=self.max_n, - extra_dims=(3,), + extra_dims=(4,), extra_dims_names=("adaptation_type",), dtype=np.int32, fill_value=-1, ) - # Set SEUT of all agents to 0 - self.SEUT_no_adapt = AgentArray( - n=self.n, max_n=self.max_n, dtype=np.float32, fill_value=0 - ) - # Set EUT of all agents to 0 - self.EUT_no_adapt = AgentArray( - n=self.n, max_n=self.max_n, dtype=np.float32, fill_value=0 - ) - self.adaptation_mechanism = AgentArray( - n=self.n, - max_n=self.max_n, - extra_dims=(3,), - extra_dims_names=("adaptation_type",), - dtype=np.int32, - fill_value=0, - ) self.crop_calendar = AgentArray( n=self.n, @@ -1070,7 +1226,7 @@ def initiate(self) -> None: rng_wells = np.random.default_rng(17) self.time_adapted[self.adapted[:, 1] == 1, 1] = rng_wells.uniform( 1, - self.lifespan, + self.lifespan_well, np.sum(self.adapted[:, 1] == 1), ) @@ -1140,13 +1296,6 @@ def initiate(self) -> None: fill_value=0, ) - self.yield_ratio_multiplier = AgentArray( - n=self.n, - max_n=self.max_n, - dtype=np.float32, - fill_value=1, - ) - self.actual_yield_per_farmer = AgentArray( n=self.n, max_n=self.max_n, @@ -1243,23 +1392,30 @@ def initiate(self) -> None: fill_value=0, ) - # Create a random set of irrigating farmers --> chance that it does not line up with farmers that are expected to have this - # Create a random generator object with a seed - rng = np.random.default_rng(42) - + # Set irrigation efficiency data + irrigation_mask = self.irrigation_source != -1 self.irrigation_efficiency = AgentArray( - n=self.n, max_n=self.max_n, dtype=np.float32, fill_value=np.nan + n=self.n, max_n=self.max_n, dtype=np.float32, fill_value=0.50 + ) + rng = np.random.default_rng(42) + self.irrigation_efficiency[irrigation_mask] = rng.choice( + [0.50, 0.70, 0.90], size=irrigation_mask.sum(), p=[0.8, 0.15, 0.05] ) - self.irrigation_efficiency[:] = rng.random(self.n) - # Set the people who already have more van 90% irrigation efficiency to already adapted for the drip irrgation adaptation self.adapted[:, 2][self.irrigation_efficiency >= 0.90] = 1 - self.adaptation_mechanism[self.adapted[:, 2] == 1, 2] = 1 - # set the yield_ratio_multiplier to x of people who have drip irrigation, set to 1 for all others - self.yield_ratio_multiplier[:] = np.where( - (self.irrigation_efficiency >= 0.90) & (self.irrigation_source_key != 0), - self.yield_ratio_multiplier_value, + rng_drip = np.random.default_rng(70) + self.time_adapted[self.adapted[:, 2] == 1, 2] = rng_drip.uniform( 1, + self.lifespan_irrigation, + np.sum(self.adapted[:, 2] == 1), + ) + + # Set irrigation expansion data + self.fraction_irrigated_field = AgentArray( + n=self.n, max_n=self.max_n, dtype=np.float32, fill_value=np.nan ) + self.fraction_irrigated_field[:] = 1 + self.adapted[:, 3][self.fraction_irrigated_field >= 1] = 1 + self.base_management_yield_ratio = AgentArray( n=self.n, max_n=self.max_n, @@ -1269,18 +1425,11 @@ def initiate(self) -> None: ], ) - rng_drip = np.random.default_rng(70) - self.time_adapted[self.adapted[:, 2] == 1, 2] = rng_drip.uniform( - 1, - self.lifespan, - np.sum(self.adapted[:, 2] == 1), - ) - # Initiate array that tracks the overall yearly costs for all adaptations - # 0 is input, 1 is microcredit, 2 is adaptation 1 (well), 3 is adaptation 2 (drip irrigation), 4 is water costs, last is total + # 0 is input, 1 is microcredit, 2 is adaptation 1 (well), 3 is adaptation 2 (drip irrigation), 4 irr. field expansion, 5 is water costs, last is total # Columns are the individual loans, i.e. if there are 2 loans for 2 wells, the first and second slot is used - self.n_loans = 5 + self.n_loans = self.adapted[0, :].size + 2 self.all_loans_annual_cost = AgentArray( n=self.n, @@ -1634,6 +1783,7 @@ def abstract_water( self.field_indices_by_farmer.data, self.field_indices, self.irrigation_efficiency.data, + self.fraction_irrigated_field.data, surface_irrigated=np.isin( self.irrigation_source, np.array( @@ -1652,6 +1802,7 @@ def abstract_water( ), cell_area=cell_area, HRU_to_grid=self.model.data.HRU.HRU_to_grid, + nearest_river_grid_cell=self.model.data.HRU.nearest_river_grid_cell, crop_map=self.var.crop_map, field_is_paddy_irrigated=self.field_is_paddy_irrigated, paddy_level=paddy_level, @@ -1721,7 +1872,7 @@ def abstract_water( balance_check( name="water withdrawal_2", how="sum", - influxes=( + outfluxes=( self.channel_abstraction_m3_by_farmer[ ~np.isnan(self.remaining_irrigation_limit_m3) ], @@ -2163,7 +2314,12 @@ def drought_risk_perception( + self.risk_perc_min ) - # print("Risk perception mean = ", np.mean(self.risk_perception)) + print( + "Risk perception mean = ", + np.mean(self.risk_perception), + "STD", + np.std(self.risk_perception), + ) # Determine which farmers need emergency microcredit to keep farming loaning_farmers = drought_loss_current >= self.moving_average_threshold @@ -2211,11 +2367,16 @@ def microcredit( "loan_duration" ] + # interest_rate = self.get_value_per_farmer_from_region_id( + # self.lending_rate, self.model.current_time + # ) + interest_rate = np.full(self.n, 0.05, dtype=np.float32) + # Compute the annual cost of the loan using the interest rate and loan duration annual_cost_microcredit = total_loan * ( - self.interest_rate[loaning_farmers] - * (1 + self.interest_rate[loaning_farmers]) ** loan_duration - / ((1 + self.interest_rate[loaning_farmers]) ** loan_duration - 1) + interest_rate[loaning_farmers] + * (1 + interest_rate[loaning_farmers]) ** loan_duration + / ((1 + interest_rate[loaning_farmers]) ** loan_duration - 1) ) # Add the amounts to the individual loan slots @@ -2266,6 +2427,11 @@ def plant(self) -> None: assert cultivation_cost.shape[0] == len(self.model.regions) assert cultivation_cost.shape[1] == len(self.crop_ids) + # interest_rate = self.get_value_per_farmer_from_region_id( + # self.lending_rate, self.model.current_time + # ) + interest_rate = np.full(self.n, 0.05, dtype=np.float32) + plant_map, farmers_selling_land = plant( n=self.n, day_index=self.model.current_time.timetuple().tm_yday - 1, # 0-indexed @@ -2280,7 +2446,7 @@ def plant(self) -> None: field_size_per_farmer=self.field_size_per_farmer.data, all_loans_annual_cost=self.all_loans_annual_cost.data, loan_tracker=self.loan_tracker.data, - interest_rate=self.interest_rate.data, + interest_rate=interest_rate, farmers_going_out_of_business=False, ) if farmers_selling_land.size > 0: @@ -2350,7 +2516,7 @@ def save_harvest_spei(self, harvesting_farmers) -> None: """ current_SPEI_per_farmer = sample_from_map( array=self.model.data.grid.spei_uncompressed, - coords=self.locations[harvesting_farmers].data, + coords=self.locations[harvesting_farmers], gt=self.model.data.grid.gt, ) @@ -2661,13 +2827,13 @@ def calculate_yield_spei_relation_test_group(self): import os import matplotlib - matplotlib.use("Agg") # Use the 'Agg' backend for non-interactive plotting + matplotlib.use("Agg") import matplotlib.pyplot as plt from scipy.optimize import curve_fit import numpy as np # Create unique groups based on agent properties - crop_elevation_group = self.create_unique_groups() + crop_elevation_group = self.create_unique_groups(10) unique_crop_combinations, group_indices = np.unique( crop_elevation_group, axis=0, return_inverse=True ) @@ -2986,22 +3152,20 @@ def calculate_yield_spei_relation(self): def calculate_yield_spei_relation_group(self): # Create unique groups - crop_elevation_group = self.create_unique_groups() + crop_elevation_group = self.create_unique_groups(1) unique_crop_combinations, group_indices = np.unique( crop_elevation_group, axis=0, return_inverse=True ) - # Mask out empty columns - mask_columns = np.any(self.yearly_yield_ratio != 0, axis=0) & np.any( - self.yearly_SPEI_probability != 0, axis=0 + # Mask out empty rows (agents) where data is zero or NaN + mask_agents = np.any(self.yearly_yield_ratio != 0, axis=1) & np.any( + self.yearly_SPEI_probability != 0, axis=1 ) - masked_yearly_yield_ratio = self.yearly_yield_ratio[:, mask_columns] - masked_SPEI_probability = self.yearly_SPEI_probability[:, mask_columns] - masked_SPEI_probability_log = np.log10(masked_SPEI_probability) - - # Number of groups - n_groups = unique_crop_combinations.shape[0] + # Apply the mask to data + masked_yearly_yield_ratio = self.yearly_yield_ratio[mask_agents, :] + masked_SPEI_probability = self.yearly_SPEI_probability[mask_agents, :] + group_indices = group_indices[mask_agents] # Number of groups n_groups = unique_crop_combinations.shape[0] @@ -3012,38 +3176,44 @@ def calculate_yield_spei_relation_group(self): r_squared_array = np.zeros(n_groups) for group_idx in range(n_groups): - col_indices = np.where(group_indices == group_idx)[0] + agent_indices = np.where(group_indices == group_idx)[0] # Get data for the group - y_data = masked_yearly_yield_ratio[col_indices, :] - X_data_log = masked_SPEI_probability_log[col_indices, :] - X_data_prob = masked_SPEI_probability[col_indices, :] + y_data = masked_yearly_yield_ratio[agent_indices, :] + X_data = masked_SPEI_probability[agent_indices, :] - # Remove values where the probability is > 1 - mask = X_data_prob >= 1 + # Remove invalid values where SPEI probability >= 1 or yield <= 0 + mask = (X_data >= 1) | (y_data <= 0) y_data[mask] = np.nan - X_data_log[mask] = np.nan + X_data[mask] = np.nan - # Compute mean over columns (axis=1) - y_group = np.nanmean(y_data, axis=1) - X_group = np.nanmean(X_data_log, axis=1) + # Compute mean over agents (axis=0 corresponds to years) + y_group = np.nanmean(y_data, axis=0) # shape (num_years,) + X_group = np.nanmean(X_data, axis=0) # shape (num_years,) - # Remove any years with NaN values - valid_indices = ~np.isnan(y_group) & ~np.isnan(X_group) - y_group = y_group[valid_indices] - X_group = X_group[valid_indices] + # Remove any entries with NaN values + valid_indices = (~np.isnan(y_group)) & (~np.isnan(X_group)) & (y_group > 0) + y_group_valid = y_group[valid_indices] + X_group_valid = X_group[valid_indices] - if len(X_group) >= 2: - # Prepare matrices - X_matrix = np.vstack([X_group, np.ones(len(X_group))]).T - # Perform linear regression - coefficients = np.linalg.lstsq(X_matrix, y_group, rcond=None)[0] - a, b = coefficients + if len(X_group_valid) >= 2: + # Take the natural logarithm of y_group_valid + ln_y_group = np.log(y_group_valid) + + # Prepare matrices for linear regression + X_matrix = np.vstack([X_group_valid, np.ones(len(X_group_valid))]).T + + # Perform linear regression on ln(y) = b * X + ln(a) + coefficients = np.linalg.lstsq(X_matrix, ln_y_group, rcond=None)[0] + b, ln_a = coefficients + a = np.exp(ln_a) + + # Calculate predicted y values + y_pred = a * np.exp(b * X_group_valid) # Calculate R² - y_pred = a * X_group + b - ss_res = np.sum((y_group - y_pred) ** 2) - ss_tot = np.sum((y_group - np.mean(y_group)) ** 2) + ss_res = np.sum((y_group_valid - y_pred) ** 2) + ss_tot = np.sum((y_group_valid - np.mean(y_group_valid)) ** 2) r_squared = 1 - ss_res / ss_tot if ss_tot != 0 else np.nan else: # Not enough data points @@ -3054,49 +3224,47 @@ def calculate_yield_spei_relation_group(self): r_squared_array[group_idx] = r_squared # Assign relations to agents - self.farmer_yield_probability_relation = np.column_stack( + farmer_yield_probability_relation = np.column_stack( (a_array[group_indices], b_array[group_indices]) ) + self.farmer_yield_probability_relation = farmer_yield_probability_relation # Print median R² - valid_r2 = r_squared_array[~np.isnan(r_squared_array)] - print("Median R²:", np.median(valid_r2) if len(valid_r2) > 0 else "N/A") + valid_r2 = r_squared_array[~np.isnan(r_squared_array)][group_indices] + print( + "Median R² for exponential model:", + np.median(valid_r2) if len(valid_r2) > 0 else "N/A", + ) def adapt_crops(self) -> None: # Fetch loan configuration loan_duration = 2 - adaptation_type = 2 - # For now there are no differences in cost, so set the annual cost per farmer for crops as annual costs - # Update to also be able to include the annual cost of the possible new crop - # annual_cost = np.sum(self.all_loans_annual_cost[:, :, 0], axis=1) - annual_cost = np.zeros( - self.n - ) # As both varieties cost the same, for now its 0 as the cost is already in the prior nnual costs - crops_with_varieties = [0, 26, 27, 1, 28, 29, 2, 30, 31, 7, 32, 33, 9, 34, 35] + index = self.cultivation_costs[0].get(self.model.current_time) + cultivation_cost = self.cultivation_costs[1][index] + cultivation_cost_current_crop = cultivation_cost[ + self.region_id, self.crop_calendar[:, 0, 0] + ] + annual_cost_empty = np.zeros(self.n, dtype=np.float32) - extra_constraint = np.any( - np.isin(self.crop_calendar[:, :, 0], crops_with_varieties), axis=1 - ) + extra_constraint = np.full(self.n, 1, dtype=np.bool_) + + # Set variable which indicates all possible crop options + unique_crop_calendars = np.unique(self.crop_calendar[:, :, 0], axis=0) ( total_profits, profits_no_event, - total_profits_adaptation_option_1, - profits_no_event_adaptation_option_1, - total_profits_adaptation_option_2, - profits_no_event_adaptation_option_2, + total_profits_adaptation, + profits_no_event_adaptation, new_crop_nr, new_farmer_id, - ) = self.profits_SEUT(adaptation_type) + ) = self.profits_SEUT_crops(unique_crop_calendars) total_annual_costs_m2 = ( self.all_loans_annual_cost[:, -1, 0] / self.field_size_per_farmer ) - # Solely the annual cost of the adaptation - annual_cost_m2 = annual_cost - # Construct a dictionary of parameters to pass to the decision module functions decision_params = { "loan_duration": loan_duration, @@ -3105,60 +3273,70 @@ def adapt_crops(self) -> None: "sigma": self.risk_aversion.data, "p_droughts": 1 / self.p_droughts[:-1], "profits_no_event": profits_no_event, + "profits_no_event_adaptation": profits_no_event_adaptation, "total_profits": total_profits, + "total_profits_adaptation": total_profits_adaptation, "risk_perception": self.risk_perception.data, "total_annual_costs": total_annual_costs_m2, - "adaptation_costs": annual_cost_m2, - "adapted": None, - "time_adapted": np.full(self.n, 1), - "T": np.full(self.n, 2), + "adaptation_costs": annual_cost_empty, + "adapted": np.zeros(self.n, dtype=np.int32), + "time_adapted": np.full(self.n, 2), + "T": np.full( + self.n, + 2, + ), "discount_rate": self.discount_rate.data, "extra_constraint": extra_constraint, } - decision_params_1 = copy.deepcopy(decision_params) - decision_params_1.update( - { - "total_profits": total_profits_adaptation_option_1, - "profits_no_event": profits_no_event_adaptation_option_1, - } - ) - - decision_params_2 = copy.deepcopy(decision_params) - decision_params_2.update( - { - "total_profits": total_profits_adaptation_option_2, - "profits_no_event": profits_no_event_adaptation_option_2, - } - ) - # Calculate the EU of not adapting and adapting respectively + # Determine the SEUT of the current crop SEUT_do_nothing = self.decision_module.calcEU_do_nothing(**decision_params) - SEUT_adapt_option_1 = self.decision_module.calcEU_do_nothing( - **decision_params_1 - ) - SEUT_adapt_option_2 = self.decision_module.calcEU_do_nothing( - **decision_params_2 - ) - assert ( - (SEUT_do_nothing != -1).any - or (SEUT_adapt_option_1 != -1).any() - or (SEUT_adapt_option_2 != -1).any() + # Determine the SEUT of the other crop options + SEUT_crop_options = np.full( + (self.n, len(unique_crop_calendars)), 0, dtype=np.float32 ) + for crop_option in range(len(unique_crop_calendars)): + # Determine the cost difference between old and potential new crop + new_crop_nr_option = new_crop_nr[:, crop_option] + cultivation_cost_new_crop = cultivation_cost[ + self.region_id, new_crop_nr_option + ] + cost_difference_adaptation = ( + cultivation_cost_new_crop - cultivation_cost_current_crop + ) + + decision_params_option = copy.deepcopy(decision_params) + decision_params_option.update( + { + "total_profits_adaptation": total_profits_adaptation[ + crop_option, :, : + ], + "profits_no_event_adaptation": profits_no_event_adaptation[ + crop_option, : + ], + "adaptation_costs": cost_difference_adaptation, + } + ) + SEUT_crop_options[:, crop_option] = self.decision_module.calcEU_adapt( + **decision_params_option + ) + + assert np.any(SEUT_do_nothing != -1) or np.any(SEUT_crop_options != -1) # Determine the best adaptation option - best_option_SEUT = np.maximum(SEUT_adapt_option_1, SEUT_adapt_option_2) - chosen_option = (SEUT_adapt_option_1 >= SEUT_adapt_option_2).astype(int) + best_option_SEUT = np.max(SEUT_crop_options, axis=1) + chosen_option = np.argmax(SEUT_crop_options, axis=1) - # Determine for which agents it is beneficial to switch crops - SEUT_adaptation_decision = best_option_SEUT > SEUT_do_nothing + # Determine the crop of the best option + row_indices = np.arange(new_crop_nr.shape[0]) + new_crop_nr_temp = new_crop_nr[row_indices, chosen_option] + new_id_temp = new_farmer_id[row_indices, chosen_option] - # Determine whether the chosen option is the first or second option - new_crop_nr_temp = np.where( - chosen_option == 0, - new_crop_nr[:, 0], - new_crop_nr[:, 1], - ) + # Determine for which agents it is beneficial to switch crops + SEUT_adaptation_decision = ( + (best_option_SEUT > (SEUT_do_nothing)) & (new_id_temp != -1) + ) # Filter out crops chosen due to small diff in do_nothing and adapt SEUT calculation # Adjust the intention threshold based on whether neighbors already have similar crop # Check for each farmer which crops their neighbors are cultivating @@ -3180,63 +3358,24 @@ def adapt_crops(self) -> None: # Set the adaptation mask SEUT_adaptation_decision = SEUT_adaptation_decision & intention_mask - final_chosen_option = chosen_option[SEUT_adaptation_decision] + new_crop_nr_final = new_crop_nr_temp[SEUT_adaptation_decision] + new_id_final = new_id_temp[SEUT_adaptation_decision] print("Crop switching farmers", np.count_nonzero(SEUT_adaptation_decision)) - # Make final selection of which crops the agents will switch to - new_crop_nr_final = np.where( - final_chosen_option == 0, - new_crop_nr[SEUT_adaptation_decision, 0], - new_crop_nr[SEUT_adaptation_decision, 1], - ) - - # Select farmer id from which they will copy the yield/spei relation - new_farmer_id = np.where( - final_chosen_option == 0, - new_farmer_id[SEUT_adaptation_decision, 0], - new_farmer_id[SEUT_adaptation_decision, 1], - ) - assert not np.any(new_crop_nr_final == -1) - # # Assuming self.crop_calendar and extra_constraint are defined - # unique_values_old, counts_old = np.unique( - # self.crop_calendar[extra_constraint, 0, 0], return_counts=True - # ) - - # # Create a formatted string for the old distribution - # old_distribution = ", ".join( - # f"{val}: {cnt}" for val, cnt in zip(unique_values_old, counts_old) - # ) - - # # Print the old distribution - # print("old distribution", "\n", old_distribution) - # Switch their crops and update their yield-SPEI relation - self.crop_calendar[SEUT_adaptation_decision, 0, 0] = new_crop_nr_final - - # unique_values, counts = np.unique( - # self.crop_calendar[extra_constraint, 0, 0], return_counts=True - # ) - - # # Calculate the difference in counts - # difference_counts = counts - counts_old - - # # Create a formatted string for the difference - # difference_distribution = ", ".join( - # f"{val}: {cnt}" for val, cnt in zip(unique_values_old, difference_counts) - # ) - - # # Print the difference - # print("difference", "\n", difference_distribution) + self.crop_calendar[SEUT_adaptation_decision, :, :] = self.crop_calendar[ + new_id_final, :, : + ] # Update yield-SPEI relation self.yearly_yield_ratio[SEUT_adaptation_decision, :] = self.yearly_yield_ratio[ - new_farmer_id, : + new_id_final, : ] self.yearly_SPEI_probability[SEUT_adaptation_decision, :] = ( - self.yearly_SPEI_probability[new_farmer_id, :] + self.yearly_SPEI_probability[new_id_final, :] ) def adapt_irrigation_well( @@ -3282,9 +3421,8 @@ def adapt_irrigation_well( # Reset farmers' status and irrigation type who exceeded the lifespan of their adaptation # and who's wells are much shallower than the groundwater depth expired_adaptations = ( - self.time_adapted[:, adaptation_type] == self.lifespan + self.time_adapted[:, adaptation_type] == self.lifespan_well ) | (groundwater_depth > self.well_depth) - self.adaptation_mechanism[expired_adaptations, adaptation_type] = 0 self.adapted[expired_adaptations, adaptation_type] = 0 self.time_adapted[expired_adaptations, adaptation_type] = -1 self.irrigation_source[expired_adaptations] = self.irrigation_source_key["no"] @@ -3341,39 +3479,18 @@ def adapt_irrigation_well( # Calculate the EU of not adapting and adapting respectively SEUT_do_nothing = self.decision_module.calcEU_do_nothing(**decision_params) - SEUT_adapt = self.decision_module.calcEU_adapt(**decision_params) assert (SEUT_do_nothing != -1).any or (SEUT_adapt != -1).any() - # Compare EU values for those who haven't adapted yet and get boolean results - SEUT_adaptation_decision = SEUT_adapt > SEUT_do_nothing - - # Adjust the intention threshold based on whether neighbors already have similar crop - # Check for each farmer which crops their neighbors are cultivating - social_network_wells = adapted[self.social_network] - - # Check whether adapting agents have adaptation type in their network and create mask - network_has_crop = np.any(social_network_wells == 1, axis=1) - - # Increase intention factor if someone in network has crop - intention_factor_adjusted = self.intention_factor.copy() - intention_factor_adjusted[network_has_crop] += 0.3 - - # Determine whether it passed the intention threshold - random_values = np.random.rand(*intention_factor_adjusted.shape) - intention_mask = random_values < intention_factor_adjusted - - SEUT_adaptation_decision = SEUT_adaptation_decision & intention_mask - - # Update the adaptation status - self.adapted[SEUT_adaptation_decision, adaptation_type] = 1 - - # Reset the timer for newly adapting farmers and update timers for others - self.time_adapted[SEUT_adaptation_decision, adaptation_type] = 0 - self.time_adapted[ - self.time_adapted[:, adaptation_type] != -1, adaptation_type - ] += 1 + SEUT_adaptation_decision = self.update_adaptation_decision( + adaptation_type=adaptation_type, + adapted=adapted, + loan_duration=loan_duration, + annual_cost=annual_cost, + SEUT_do_nothing=SEUT_do_nothing, + SEUT_adapt=SEUT_adapt, + ) # Update irrigation source for farmers who adapted self.irrigation_source[SEUT_adaptation_decision] = self.irrigation_source_key[ @@ -3383,19 +3500,6 @@ def adapt_irrigation_well( # Set their well depth self.well_depth[SEUT_adaptation_decision] = well_depth[SEUT_adaptation_decision] - # Update annual costs and disposable income for adapted farmers - self.all_loans_annual_cost[ - SEUT_adaptation_decision, adaptation_type + 1, 0 - ] += annual_cost[SEUT_adaptation_decision] # For wells specifically - self.all_loans_annual_cost[SEUT_adaptation_decision, -1, 0] += annual_cost[ - SEUT_adaptation_decision - ] # Total loan amount - - # set loan tracker - self.loan_tracker[SEUT_adaptation_decision, adaptation_type + 1, 0] += ( - loan_duration - ) - # Print the percentage of adapted households percentage_adapted = round( np.sum(self.adapted[:, adaptation_type]) @@ -3426,18 +3530,20 @@ def adapt_irrigation_efficiency(self, energy_cost, water_cost) -> None: ]["adaptation_sprinkler"]["loan_duration"] # Placeholder - costs_irrigation_system = 4 * self.field_size_per_farmer + # 0.5 because they first decide to irrigate half their field + costs_irrigation_system = 1 * self.field_size_per_farmer * 0.5 + + # interest_rate = self.get_value_per_farmer_from_region_id( + # self.lending_rate, self.model.current_time + # ) + interest_rate = 0.05 annual_cost = costs_irrigation_system * ( - self.interest_rate - * (1 + self.interest_rate) ** loan_duration - / ((1 + self.interest_rate) ** loan_duration - 1) + interest_rate + * (1 + interest_rate) ** loan_duration + / ((1 + interest_rate) ** loan_duration - 1) ) - # Compute the total annual per square meter costs if farmers adapt during this cycle - # This cost is the cost if the farmer would adapt, plus its current costs of previous - # adaptations - total_annual_costs_m2 = ( annual_cost + self.all_loans_annual_cost[:, -1, 0] ) / self.field_size_per_farmer @@ -3445,16 +3551,20 @@ def adapt_irrigation_efficiency(self, energy_cost, water_cost) -> None: # Solely the annual cost of the adaptation annual_cost_m2 = annual_cost / self.field_size_per_farmer + # Create mask for those who have access to irrigation water + has_irrigation_access = ~np.all( + self.yearly_abstraction_m3_by_farmer[:, 3, :] == 0, axis=1 + ) + # Reset farmers' status and irrigation type who exceeded the lifespan of their adaptation - # and who's wells are much shallower than the groundwater depth - expired_adaptations = self.time_adapted[:, adaptation_type] == self.lifespan - self.adaptation_mechanism[expired_adaptations, adaptation_type] = 0 + # or who's never had access to irrigation water + expired_adaptations = ( + self.time_adapted[:, adaptation_type] == self.lifespan_irrigation + ) | has_irrigation_access self.adapted[expired_adaptations, adaptation_type] = 0 self.time_adapted[expired_adaptations, adaptation_type] = -1 - extra_constraint = np.full(self.n, 1, dtype=bool) - - # To determine the benefit of irrigation, those who have a well are adapted + # To determine the benefit of irrigation, those who have above 90% irrigation efficiency have adapted adapted = np.where((self.adapted[:, adaptation_type] == 1), 1, 0) ( @@ -3465,10 +3575,14 @@ def adapt_irrigation_efficiency(self, energy_cost, water_cost) -> None: ( total_profits, profits_no_event, - ) = self.profits_SEUT(0, adapted) + total_profits_adaptation, + profits_no_event_adaptation, + ) = self.profits_SEUT(adaptation_type, adapted) - total_profits_adaptation = total_profits + energy_diff + water_diff - profits_no_event_adaptation = profits_no_event + energy_diff + water_diff + total_profits_adaptation = total_profits_adaptation + energy_diff + water_diff + profits_no_event_adaptation = ( + profits_no_event_adaptation + energy_diff + water_diff + ) # Construct a dictionary of parameters to pass to the decision module functions decision_params = { @@ -3493,7 +3607,7 @@ def adapt_irrigation_efficiency(self, energy_cost, water_cost) -> None: ]["decision_horizon"], ), "discount_rate": self.discount_rate.data, - "extra_constraint": extra_constraint, + "extra_constraint": has_irrigation_access, } # Calculate the EU of not adapting and adapting respectively @@ -3502,16 +3616,159 @@ def adapt_irrigation_efficiency(self, energy_cost, water_cost) -> None: assert (SEUT_do_nothing != -1).any or (SEUT_adapt != -1).any() - # Compare EU values for those who haven't adapted yet and get boolean results - SEUT_adaptation_decision = SEUT_adapt > SEUT_do_nothing + SEUT_adaptation_decision = self.update_adaptation_decision( + adaptation_type=adaptation_type, + adapted=adapted, + loan_duration=loan_duration, + annual_cost=annual_cost, + SEUT_do_nothing=SEUT_do_nothing, + SEUT_adapt=SEUT_adapt, + ) - social_network_adaptation = adapted[self.social_network] + # Update irrigation efficiency for farmers who adapted + self.irrigation_efficiency[SEUT_adaptation_decision] = 0.9 - # Check whether adapting agents have adaptation type in their network and create mask - network_has_adaptation = np.any(social_network_adaptation == 1, axis=1) + # Print the percentage of adapted households + percentage_adapted = round( + np.sum(self.adapted[:, adaptation_type]) + / len(self.adapted[:, adaptation_type]) + * 100, + 2, + ) + print("Irrigation efficient farms:", percentage_adapted, "(%)") - # Increase intention factor if someone in network has crop - intention_factor_adjusted = self.intention_factor.copy() + def adapt_irrigation_expansion(self, energy_cost, water_cost) -> None: + # Constants + adaptation_type = 3 + + loan_duration = self.model.config["agent_settings"]["farmers"][ + "expected_utility" + ]["adaptation_sprinkler"]["loan_duration"] + + # If the farmers have drip/sprinkler irrigation, they would also have additional costs of expanding that + # Costs are less than the initial expansion + adapted_irr_eff = np.where((self.adapted[:, 2] == 1), 1, 0) + total_costs = np.zeros(self.n, dtype=np.float32) + total_costs[adapted_irr_eff] = 2 * self.field_size_per_farmer * 0.5 + + # interest_rate = self.get_value_per_farmer_from_region_id( + # self.lending_rate, self.model.current_time + # ) + interest_rate = 0.05 + + annual_cost = total_costs * ( + interest_rate + * (1 + interest_rate) ** loan_duration + / ((1 + interest_rate) ** loan_duration - 1) + ) + + # Farmers will have the same yearly water costs added if they expand + annual_cost += energy_cost + annual_cost += water_cost + + # Will also have the input/labor costs doubled + annual_cost += np.sum(self.all_loans_annual_cost[:, 0, :], axis=1) + + total_annual_costs_m2 = ( + annual_cost + self.all_loans_annual_cost[:, -1, 0] + ) / self.field_size_per_farmer + + annual_cost_m2 = annual_cost / self.field_size_per_farmer + + # Create mask for those who have access to irrigation water + has_irrigation_access = np.all( + self.yearly_abstraction_m3_by_farmer[:, 3, :] == 0, axis=1 + ) + + # Reset farmers' status and irrigation type who exceeded the lifespan of their adaptation + # or who's never had access to irrigation water + expired_adaptations = ( + self.time_adapted[:, adaptation_type] == self.lifespan_irrigation + ) | np.all(self.yearly_abstraction_m3_by_farmer[:, 3, :] == 0, axis=1) + self.adapted[expired_adaptations, adaptation_type] = 0 + self.time_adapted[expired_adaptations, adaptation_type] = -1 + + adapted = np.where((self.adapted[:, adaptation_type] == 1), 1, 0) + + ( + total_profits, + profits_no_event, + total_profits_adaptation, + profits_no_event_adaptation, + ) = self.profits_SEUT(adaptation_type, adapted) + + # Construct a dictionary of parameters to pass to the decision module functions + decision_params = { + "loan_duration": loan_duration, + "expenditure_cap": self.expenditure_cap, + "n_agents": self.n, + "sigma": self.risk_aversion.data, + "p_droughts": 1 / self.p_droughts[:-1], + "total_profits_adaptation": total_profits_adaptation, + "profits_no_event": profits_no_event, + "profits_no_event_adaptation": profits_no_event_adaptation, + "total_profits": total_profits, + "risk_perception": self.risk_perception.data, + "total_annual_costs": total_annual_costs_m2, + "adaptation_costs": annual_cost_m2, + "adapted": adapted, + "time_adapted": self.time_adapted[:, adaptation_type], + "T": np.full( + self.n, + self.model.config["agent_settings"]["farmers"]["expected_utility"][ + "adaptation_sprinkler" + ]["decision_horizon"], + ), + "discount_rate": self.discount_rate.data, + "extra_constraint": has_irrigation_access, + } + + # Calculate the EU of not adapting and adapting respectively + SEUT_do_nothing = self.decision_module.calcEU_do_nothing(**decision_params) + SEUT_adapt = self.decision_module.calcEU_adapt(**decision_params) + + assert (SEUT_do_nothing != -1).any or (SEUT_adapt != -1).any() + + SEUT_adaptation_decision = self.update_adaptation_decision( + adaptation_type=adaptation_type, + adapted=adapted, + loan_duration=loan_duration, + annual_cost=annual_cost, + SEUT_do_nothing=SEUT_do_nothing, + SEUT_adapt=SEUT_adapt, + ) + + # Update irrigation efficiency for farmers who adapted + self.fraction_irrigated_field[SEUT_adaptation_decision] = 1 + + # Print the percentage of adapted households + percentage_adapted = round( + np.sum(self.adapted[:, adaptation_type]) + / len(self.adapted[:, adaptation_type]) + * 100, + 2, + ) + print("Irrigation expanded farms:", percentage_adapted, "(%)") + + def update_adaptation_decision( + self, + adaptation_type, + adapted, + loan_duration, + annual_cost, + SEUT_do_nothing, + SEUT_adapt, + ): + # Compare EU values for those who haven't adapted yet and get boolean results + SEUT_adaptation_decision = SEUT_adapt > SEUT_do_nothing + + social_network_adaptation = adapted[self.social_network] + + # Check whether adapting agents have adaptation type in their network and create mask + network_has_adaptation = np.any(social_network_adaptation == 1, axis=1) + + # Increase intention factor if someone in network has crop + intention_factor_adjusted = self.intention_factor.copy() intention_factor_adjusted[network_has_adaptation] += 0.3 # Determine whether it passed the intention threshold @@ -3529,15 +3786,6 @@ def adapt_irrigation_efficiency(self, energy_cost, water_cost) -> None: self.time_adapted[:, adaptation_type] != -1, adaptation_type ] += 1 - # Reset the timer for newly adapting farmers and update timers for others - self.time_adapted[SEUT_adaptation_decision, adaptation_type] = 0 - self.time_adapted[ - self.time_adapted[:, adaptation_type] != -1, adaptation_type - ] += 1 - - # Update irrigation efficiency for farmers who adapted - self.irrigation_efficiency[SEUT_adaptation_decision] = 0.9 - # Update annual costs and disposable income for adapted farmers self.all_loans_annual_cost[ SEUT_adaptation_decision, adaptation_type + 1, 0 @@ -3551,14 +3799,7 @@ def adapt_irrigation_efficiency(self, energy_cost, water_cost) -> None: loan_duration ) - # Print the percentage of adapted households - percentage_adapted = round( - np.sum(self.adapted[:, adaptation_type]) - / len(self.adapted[:, adaptation_type]) - * 100, - 2, - ) - print("Irrigation efficient farms:", percentage_adapted, "(%)") + return SEUT_adaptation_decision def calculate_water_costs(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ @@ -3698,9 +3939,9 @@ def calculate_water_costs(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: # Update loan records with the annual cost of water and energy for i in range(4): # Find the first available loan slot - if np.all(self.all_loans_annual_cost.data[:, 4, i] == 0): - self.all_loans_annual_cost.data[:, 4, i] = annual_cost_water_energy - self.loan_tracker[annual_cost_water_energy > 0, 4, i] = loan_duration + if np.all(self.all_loans_annual_cost.data[:, 5, i] == 0): + self.all_loans_annual_cost.data[:, 5, i] = annual_cost_water_energy + self.loan_tracker[annual_cost_water_energy > 0, 5, i] = loan_duration break # Add the annual cost to the total loan annual costs @@ -3793,7 +4034,8 @@ def calculate_well_costs_global( # Calculate annuity factor for loan repayment using the annuity formula # A = P * [r(1+r)^n] / [(1+r)^n -1], where: # A = annual payment, P = principal amount (install_cost), r = interest rate, n = loan duration - interest_rate = self.interest_rate + interest_rate = 0.05 + n = loan_duration annuity_factor = (interest_rate * (1 + interest_rate) ** n) / ( (1 + interest_rate) ** n - 1 @@ -3809,138 +4051,83 @@ def profits_SEUT( ) -> Union[ Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], - Tuple[ - np.ndarray, - np.ndarray, - np.ndarray, - np.ndarray, - np.ndarray, - np.ndarray, - np.ndarray, - np.ndarray, - ], ]: """ - Calculate total profits under different drought probability scenarios, with and without adaptation measures. - - This method computes the profits for agents under various drought probabilities, considering the potential - impacts of different adaptation strategies (e.g., installing wells or changing crops). It returns the profits - both with and without adaptation, depending on the adaptation type specified. + Calculate total profits under different drought probability scenarios, with and without adaptation measures + for adaptation types 0 and 1. Parameters: adaptation_type (int): The type of adaptation to consider. - 0: No adaptation. - 1: Adaptation Type 1 (e.g., installing wells). - - Other: Other adaptation types (e.g., changing crops). adapted (np.ndarray, optional): An array indicating which agents have adapted (relevant for adaptation_type == 1). Returns: Depending on adaptation_type, returns a tuple containing total profits and profits under 'no drought' scenario, - possibly including adaptation profits and additional data. + possibly including adaptation profits. """ - def compute_adaptation_gains(yield_ratios: np.ndarray) -> Dict[str, Any]: - """ - Compute adaptation gains based on the adaptation type. - - Parameters: - yield_ratios (np.ndarray): Original yield ratios without adaptation. - - Returns: - Dict[str, Any]: Dictionary containing adaptation gains and additional data. - """ - adaptation_data = {} - if adaptation_type == 1: - # Adaptation Type 1: e.g., installing wells - gains_adaptation = self.adaptation_yield_ratio_difference( - adapted, yield_ratios - ) - adaptation_data["gains_adaptation"] = gains_adaptation - else: - # Other Adaptation Types: e.g., changing crops - ( - gains_option_1, - gains_option_2, - new_crop_nr, - new_farmer_id, - ) = self.crop_yield_ratio_difference(yield_ratios) - adaptation_data.update( - { - "gains_option_1": gains_option_1, - "gains_option_2": gains_option_2, - "new_crop_nr": new_crop_nr, - "new_farmer_id": new_farmer_id, - } - ) - return adaptation_data - - def adjust_yield_ratios_with_adaptation( - yield_ratios: np.ndarray, adaptation_data: Dict[str, Any] - ) -> Dict[str, np.ndarray]: - """ - Adjust yield ratios by adding adaptation gains and clipping values between 0 and 1. - - Parameters: - yield_ratios (np.ndarray): Original yield ratios. - adaptation_data (Dict[str, Any]): Adaptation gains data. - - Returns: - Dict[str, np.ndarray]: Dictionary containing adjusted yield ratios. - """ - adjusted_yield_ratios = {} - if adaptation_type == 1: - yield_ratios_adaptation = np.clip( - yield_ratios + adaptation_data["gains_adaptation"], 0, 1 - ) - adjusted_yield_ratios["yield_ratios_adaptation"] = ( - yield_ratios_adaptation - ) - else: - yield_ratios_adaptation_option_1 = np.clip( - yield_ratios + adaptation_data["gains_option_1"], 0, 1 - ) - yield_ratios_adaptation_option_2 = np.clip( - yield_ratios + adaptation_data["gains_option_2"], 0, 1 - ) - adjusted_yield_ratios.update( - { - "yield_ratios_adaptation_option_1": yield_ratios_adaptation_option_1, - "yield_ratios_adaptation_option_2": yield_ratios_adaptation_option_2, - } - ) - return adjusted_yield_ratios - - def compute_total_profits( - yield_ratios: np.ndarray, crops_mask: np.ndarray, nan_array: np.ndarray - ) -> np.ndarray: - """ - Compute total profits for all agents across different drought scenarios. - - Parameters: - yield_ratios (np.ndarray): Yield ratios for agents under different drought scenarios. - crops_mask (np.ndarray): Mask indicating valid crop entries. - nan_array (np.ndarray): Array filled with NaNs for reference. - - Returns: - np.ndarray: Total profits for agents under each drought scenario. - """ - total_profits = np.zeros((self.n, yield_ratios.shape[1])) - for col in range(yield_ratios.shape[1]): - total_profits[:, col] = self.yield_ratio_to_profit( - yield_ratios[:, col], crops_mask, nan_array - ) - return total_profits + # Main function logic + yield_ratios = self.convert_probability_to_yield_ratio() - def format_results(total_profits: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: - """ - Transpose and slice the total profits matrix, and extract the 'no drought' scenario profits. + # Create a mask for valid crops (exclude non-crop values) + crops_mask = (self.crop_calendar[:, :, 0] >= 0) & ( + self.crop_calendar[:, :, 0] < len(self.crop_data["reference_yield_kg_m2"]) + ) - """ - total_profits = total_profits.T - profits_no_event = total_profits[-1, :] - total_profits = total_profits[:-1, :] + # Create an array filled with NaNs for reference data + nan_array = np.full_like( + self.crop_calendar[:, :, 0], fill_value=np.nan, dtype=float + ) + + # Compute profits without adaptation + total_profits = self.compute_total_profits(yield_ratios, crops_mask, nan_array) + total_profits, profits_no_event = self.format_results(total_profits) + + # Handle adaptation types 0 and 1 + if adaptation_type != 0: + # Adaptation Type 1: e.g., installing wells + gains_adaptation = self.adaptation_yield_ratio_difference( + adapted, yield_ratios + ) + # Adjust yield ratios with adaptation gains + yield_ratios_adaptation = np.clip(yield_ratios + gains_adaptation, 0, 1) + # Compute profits with adaptation + total_profits_adaptation = self.compute_total_profits( + yield_ratios_adaptation, crops_mask, nan_array + ) + total_profits_adaptation, profits_no_event_adaptation = self.format_results( + total_profits_adaptation + ) + return ( + total_profits, + profits_no_event, + total_profits_adaptation, + profits_no_event_adaptation, + ) + else: + # No adaptation (adaptation_type == 0) return total_profits, profits_no_event + def profits_SEUT_crops( + self, unique_crop_calendars + ) -> Tuple[ + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + np.ndarray, + ]: + """ + Calculate total profits under different drought probability scenarios with crop adaptation measures (Adaptation Type 2). + + Returns: + Tuple containing profits without adaptation, profits with adaptation options, and additional data. + """ + # Main function logic yield_ratios = self.convert_probability_to_yield_ratio() @@ -3955,66 +4142,110 @@ def format_results(total_profits: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: ) # Compute profits without adaptation - total_profits = compute_total_profits(yield_ratios, crops_mask, nan_array) - total_profits, profits_no_event = format_results(total_profits) + total_profits = self.compute_total_profits(yield_ratios, crops_mask, nan_array) + total_profits, profits_no_event = self.format_results(total_profits) - # Initialize adaptation data - if adaptation_type != 0: - adaptation_data = compute_adaptation_gains(yield_ratios) - adjusted_yield_ratios = adjust_yield_ratios_with_adaptation( - yield_ratios, adaptation_data + crop_elevation_group = self.create_unique_groups() + + unique_crop_groups, group_indices = np.unique( + crop_elevation_group, axis=0, return_inverse=True + ) + + # Calculate the yield gains for crop switching for different farmers + ( + yield_gains, + new_crop_nr, + new_farmer_id, + ) = crop_yield_ratio_difference_test_njit( + yield_ratios=yield_ratios, + crop_elevation_group=crop_elevation_group, + unique_crop_groups=unique_crop_groups, + group_indices=group_indices, + crop_calendar=self.crop_calendar.data, + unique_crop_calendars=unique_crop_calendars, + p_droughts=self.p_droughts, + ) + + total_profits_adaptation = np.full( + (len(unique_crop_calendars), len(self.p_droughts[:-1]), self.n), + 0.0, + dtype=np.float32, + ) + profits_no_event_adaptation = np.full( + (len(unique_crop_calendars), self.n), + 0.0, + dtype=np.float32, + ) + + for crop_option in range(len(unique_crop_calendars)): + yield_gains_option = yield_gains[:, crop_option, :] + # Adjust yield ratios with adaptation gains + yield_ratios_adaptation_option = np.clip( + yield_ratios + yield_gains_option, 0, 1 ) - if adaptation_type == 1: - # Adaptation Type 1 - total_profits_adaptation = compute_total_profits( - adjusted_yield_ratios["yield_ratios_adaptation"], - crops_mask, - nan_array, - ) - total_profits_adaptation, profits_no_event_adaptation = format_results( - total_profits_adaptation - ) - return ( - total_profits, - profits_no_event, - total_profits_adaptation, - profits_no_event_adaptation, - ) - else: - # Other Adaptation Types - total_profits_adaptation_option_1 = compute_total_profits( - adjusted_yield_ratios["yield_ratios_adaptation_option_1"], - crops_mask, - nan_array, - ) - ( - total_profits_adaptation_option_1, - profits_no_event_adaptation_option_1, - ) = format_results(total_profits_adaptation_option_1) - - total_profits_adaptation_option_2 = compute_total_profits( - adjusted_yield_ratios["yield_ratios_adaptation_option_2"], - crops_mask, - nan_array, - ) - ( - total_profits_adaptation_option_2, - profits_no_event_adaptation_option_2, - ) = format_results(total_profits_adaptation_option_2) - - return ( - total_profits, - profits_no_event, - total_profits_adaptation_option_1, - profits_no_event_adaptation_option_1, - total_profits_adaptation_option_2, - profits_no_event_adaptation_option_2, - adaptation_data["new_crop_nr"], - adaptation_data["new_farmer_id"], - ) - else: - return total_profits, profits_no_event + # Compute profits with adaptation options + total_profits_adaptation_option = self.compute_total_profits( + yield_ratios_adaptation_option, crops_mask, nan_array + ) + ( + total_profits_adaptation_option, + profits_no_event_adaptation_option, + ) = self.format_results(total_profits_adaptation_option) + + total_profits_adaptation[crop_option, :, :] = ( + total_profits_adaptation_option + ) + profits_no_event_adaptation[crop_option, :] = ( + profits_no_event_adaptation_option + ) + + return ( + total_profits, + profits_no_event, + total_profits_adaptation, + profits_no_event_adaptation, + new_crop_nr, + new_farmer_id, + ) + + def compute_total_profits( + self, yield_ratios: np.ndarray, crops_mask: np.ndarray, nan_array: np.ndarray + ) -> np.ndarray: + """ + Compute total profits for all agents across different drought scenarios. + + Parameters: + yield_ratios (np.ndarray): Yield ratios for agents under different drought scenarios. + crops_mask (np.ndarray): Mask indicating valid crop entries. + nan_array (np.ndarray): Array filled with NaNs for reference. + + Returns: + np.ndarray: Total profits for agents under each drought scenario. + """ + total_profits = np.zeros((self.n, yield_ratios.shape[1])) + for col in range(yield_ratios.shape[1]): + total_profits[:, col] = self.yield_ratio_to_profit( + yield_ratios[:, col], crops_mask, nan_array + ) + return total_profits + + def format_results( + self, total_profits: np.ndarray + ) -> Tuple[np.ndarray, np.ndarray]: + """ + Transpose and slice the total profits matrix, and extract the 'no drought' scenario profits. + + Parameters: + total_profits (np.ndarray): Total profits matrix. + + Returns: + Tuple[np.ndarray, np.ndarray]: Transposed profits and 'no drought' scenario profits. + """ + total_profits = total_profits.T + profits_no_event = total_profits[-1, :] + total_profits = total_profits[:-1, :] + return total_profits, profits_no_event def convert_probability_to_yield_ratio(self) -> np.ndarray: """ @@ -4035,16 +4266,28 @@ def logarithmic_function(probability, params): a = params[:, 0] b = params[:, 1] x = probability[:, np.newaxis] - return a * np.log10(x) + b - yield_ratios = logarithmic_function( + def exponential_function(probability, params): + # Extract parameters + a = params[:, 0] # Shape: (num_agents,) + b = params[:, 1] # Shape: (num_agents,) + x = probability # Shape: (num_events,) + + # Reshape arrays for broadcasting + a = a[:, np.newaxis] # Shape: (num_agents, 1) + b = b[:, np.newaxis] # Shape: (num_agents, 1) + x = x[np.newaxis, :] # Shape: (1, num_events) + + # Compute the exponential function + return a * np.exp(b * x) # Shape: (num_agents, num_events) + + yield_ratios = exponential_function( 1 / self.p_droughts, self.farmer_yield_probability_relation - ).T + ) # Adjust the yield ratios to lie between 0 and 1 - yield_ratios[yield_ratios < 0] = 0 # Ensure non-negative yield ratios - yield_ratios[yield_ratios > 1] = 1 # Cap the yield ratios at 1 + yield_ratios = np.clip(yield_ratios, 0, 1) # Store the results in a global variable self.yield_ratios_drought_event = yield_ratios[:] @@ -4065,7 +4308,7 @@ def create_unique_groups(self, N=5): percentiles = [100 * i / N for i in range(1, N)] basin_elevation_thresholds = np.percentile(self.elevation.data, percentiles) - # Use np.digitize to assign group labels + # assign group labels distribution_array = np.digitize( self.elevation.data, bins=basin_elevation_thresholds, right=False ) @@ -4081,205 +4324,6 @@ def create_unique_groups(self, N=5): return crop_elevation_group.astype(np.int32) - def crop_yield_ratio_difference(self, yield_ratios) -> np.ndarray: - """ - Calculate the relative yield ratio improvement for farmers adopting a certain adaptation. - - This function determines how much better farmers that have adopted a particular adaptation - are doing in terms of their yield ratio as compared to those who haven't. - - Args: - adaptation_type: The type of adaptation being considered. - - Returns: - An array representing the relative yield ratio improvement for each agent. - - TO DO: vectorize - """ - - # Make array based on elevation and crop calendar - crop_elevation_group = self.create_unique_groups() - - # Get unique groups and group indices - unique_crop_groups, group_indices = np.unique( - crop_elevation_group, axis=0, return_inverse=True - ) - # Initialize array to store relative yield ratio improvement for unique groups - unique_yield_ratio_gain_option_1 = np.full( - (len(unique_crop_groups), len(self.p_droughts)), 0, dtype=np.float32 - ) - unique_yield_ratio_gain_option_2 = np.full( - (len(unique_crop_groups), len(self.p_droughts)), 0, dtype=np.float32 - ) - id_to_switch_to = np.full((len(unique_crop_groups), 2), -1) - crop_to_switch_to = np.full((len(unique_crop_groups), 2), -1) - - def get_updated_combinations(unique_combination): - """ - Update combinations based on the value of unique_combination[0]. - - Parameters: - unique_combination (list): List containing the unique combination values. - - Returns: - tuple: Updated combinations (unique_combination_option_1, unique_combination_option_2). - """ - # Define the mapping of initial values to their corresponding options - condition_mapping = { - 0: (26, 27), - 26: (0, 27), - 27: (0, 26), - 1: (28, 29), - 28: (1, 29), - 29: (1, 28), - 2: (30, 31), - 30: (2, 31), - 31: (2, 30), - 7: (32, 33), - 32: (7, 33), - 33: (7, 32), - 9: (34, 35), - 34: (9, 35), - 35: (9, 34), - } - - # Initialize the options with the original unique_combination values - unique_combination_option_1 = unique_combination.copy() - unique_combination_option_2 = unique_combination.copy() - - # Check if the initial value is in the mapping - if unique_combination[0] in condition_mapping: - option_1_value, option_2_value = condition_mapping[ - unique_combination[0] - ] - unique_combination_option_1[0] = option_1_value - unique_combination_option_2[0] = option_2_value - - return unique_combination_option_1, unique_combination_option_2 - - crops_with_varieties = [0, 26, 27, 1, 28, 29, 2, 30, 31, 7, 32, 33, 9, 34, 35] - - # Loop over each unique group of farmers to determine their average yield ratio - for idx, unique_combination in enumerate(unique_crop_groups): - if unique_combination[0] in crops_with_varieties: - unique_farmer_groups = ( - crop_elevation_group == unique_combination[None, ...] - ).all(axis=1) - - # Identify the adapted counterpart of the current group - unique_combination_option_1 = unique_combination.copy() - unique_combination_option_2 = unique_combination.copy() - - unique_combination_option_1, unique_combination_option_2 = ( - get_updated_combinations(unique_combination) - ) - - unique_farmer_groups_option_1 = ( - crop_elevation_group == unique_combination_option_1[None, ...] - ).all(axis=1) - - unique_farmer_groups_option_2 = ( - crop_elevation_group == unique_combination_option_2[None, ...] - ).all(axis=1) - - def find_most_similar_index(target_series, yield_ratios, groups): - distances = np.linalg.norm( - yield_ratios[groups] - target_series, axis=1 - ) - most_similar_index_within_subset = np.argmin(distances) - global_indices = np.where(groups)[0] - return global_indices[most_similar_index_within_subset] - - def calculate_yield_ratio_gain( - yield_ratios, current_yield_ratio, groups - ): - yield_ratio = np.mean(yield_ratios[groups, :], axis=0) - yield_ratio_gain = yield_ratio - current_yield_ratio - return yield_ratio_gain - - def process_option( - yield_ratios, - unique_combination_option, - groups, - current_yield_ratio, - crop_to_switch_to, - id_to_switch_to, - option_nr, - idx, - ): - yield_ratio_gain = calculate_yield_ratio_gain( - yield_ratios, current_yield_ratio, groups - ) - crop_to_switch_to[idx, option_nr] = unique_combination_option[0] - - if np.isnan(yield_ratio_gain).all(): - yield_ratio_gain = np.zeros_like(yield_ratio_gain) - else: - id_to_switch_to[idx, option_nr] = find_most_similar_index( - yield_ratio_gain, yield_ratios, groups - ) - return yield_ratio_gain - - # Calculate mean yield ratio over past years for the unadapted group - current_yield_ratio = np.mean( - yield_ratios[unique_farmer_groups, :], axis=0 - ) - - if np.isnan(current_yield_ratio).all(): - current_yield_ratio = np.zeros_like(current_yield_ratio) - - # Process option 1 - yield_ratio_gain_relative_option_1 = process_option( - yield_ratios, - unique_combination_option_1, - unique_farmer_groups_option_1, - current_yield_ratio, - crop_to_switch_to, - id_to_switch_to, - 0, - idx, - ) - - # Process option 2 - yield_ratio_gain_relative_option_2 = process_option( - yield_ratios, - unique_combination_option_2, - unique_farmer_groups_option_2, - current_yield_ratio, - crop_to_switch_to, - id_to_switch_to, - 1, - idx, - ) - - unique_yield_ratio_gain_option_1[idx, :] = ( - yield_ratio_gain_relative_option_1 - ) - unique_yield_ratio_gain_option_2[idx, :] = ( - yield_ratio_gain_relative_option_2 - ) - - assert not ( - np.isinf(yield_ratio_gain_relative_option_1).any() - or np.isinf(yield_ratio_gain_relative_option_2).any() - ), "gains adaptation value is inf" - - else: - pass - - # Convert group-based results into agent-specific results - gains_adaptation_option_1 = unique_yield_ratio_gain_option_1[group_indices, :] - gains_adaptation_option_2 = unique_yield_ratio_gain_option_2[group_indices, :] - new_crop_nr = crop_to_switch_to[group_indices, :] - new_farmer_id = id_to_switch_to[group_indices, :] - - return ( - gains_adaptation_option_1, - gains_adaptation_option_2, - new_crop_nr, - new_farmer_id, - ) - def adaptation_yield_ratio_difference( self, adapted: np.ndarray, yield_ratios ) -> np.ndarray: @@ -4695,11 +4739,13 @@ def step(self) -> None: np.mean(self.yearly_yield_ratio[:, 1]), ) + timer = TimingModule("crop_farmers") + energy_cost, water_cost, average_extraction_speed = ( self.calculate_water_costs() ) - timer = TimingModule("crop_farmers") + timer.new_split("water & energy costs") if ( not self.model.spinup @@ -4707,8 +4753,8 @@ def step(self) -> None: and not self.config["ruleset"] == "no-adaptation" ): # Determine the relation between drought probability and yield - self.calculate_yield_spei_relation() - # self.calculate_yield_spei_relation_group() + # self.calculate_yield_spei_relation() + self.calculate_yield_spei_relation_group() # self.calculate_yield_spei_relation_test_group() timer.new_split("yield-spei relation") @@ -4732,6 +4778,14 @@ def step(self) -> None: ): self.adapt_irrigation_efficiency(energy_cost, water_cost) timer.new_split("irr efficiency") + if ( + not self.config["expected_utility"][ + "adaptation_irrigation_expansion" + ]["ruleset"] + == "no-adaptation" + ): + self.adapt_irrigation_expansion(energy_cost, water_cost) + timer.new_split("irr expansion") if ( not self.config["expected_utility"]["crop_switching"]["ruleset"] == "no-adaptation" @@ -4877,7 +4931,6 @@ def add_agent( "yearly_potential_profits": 1, "farmer_yield_probability_relation": 1, "irrigation_efficiency": 0.9, - "yield_ratio_multiplier": 1, "base_management_yield_ratio": 1, "yield_ratio_management": 1, "annual_costs_all_adaptations": 1, diff --git a/geb/agents/households.py b/geb/agents/households.py index 5df394d2..d867fe4b 100644 --- a/geb/agents/households.py +++ b/geb/agents/households.py @@ -409,7 +409,7 @@ def water_demand(self): return self.current_water_demand, self.current_efficiency def step(self) -> None: - self.risk_perception *= self.risk_perception + # self.risk_perception *= self.risk_perception return None @property diff --git a/geb/calibrate.py b/geb/calibrate.py index f7d3c073..5c742408 100644 --- a/geb/calibrate.py +++ b/geb/calibrate.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- - +# fmt: off """ Calibration tool for Hydrological models using a distributed evolutionary algorithms in python @@ -13,7 +13,6 @@ Thanks Hylke for making it available for use and modification Modified by Peter Burek and Jens de Bruijn """ - import os import shutil import array @@ -27,15 +26,14 @@ import geopandas as gpd from deap import creator, base, tools, algorithms from functools import wraps, partial +import json +import sys import multiprocessing from subprocess import Popen, PIPE import pickle -SCENARIO = "adaptation" - - def KGE_calculation(s, o): """ Kling Gupta Efficiency (Kling et al., 2012, http://dx.doi.org/10.1016/j.jhydrol.2012.01.011) @@ -47,57 +45,44 @@ def KGE_calculation(s, o): """ B = np.mean(s) / np.mean(o) y = (np.std(s) / np.mean(s)) / (np.std(o) / np.mean(o)) - r = np.corrcoef(o, s)[0, 1] + r = np.corrcoef(o, s)[0,1] kge = 1 - np.sqrt((r - 1) ** 2 + (B - 1) ** 2 + (y - 1) ** 2) return kge - def get_observed_well_ratio(config): - observed_irrigation_sources = gpd.read_file( - os.path.join( - config["general"]["original_data"], - "census", - "output", - "irrigation_source_2010-2011.geojson", - ) - ).to_crs(3857) - simulated_subdistricts = gpd.read_file( - os.path.join(config["general"]["input_folder"], "areamaps", "regions.geojson") - ) - # set index to unique ID combination of state, district and subdistrict - observed_irrigation_sources.set_index( - ["state_code", "district_c", "sub_distri"], inplace=True - ) - simulated_subdistricts.set_index( - ["state_code", "district_c", "sub_distri"], inplace=True - ) - # select rows from observed_irrigation_sources where the index is in simulated_subdistricts - observed_irrigation_sources = observed_irrigation_sources.loc[ - simulated_subdistricts.index - ] - - region_mask = gpd.read_file( - os.path.join(config["general"]["input_folder"], "areamaps", "region.geojson") - ).to_crs(3857) - assert len(region_mask) == 1 - # get overlapping areas of observed_irrigation_sources and region_mask - observed_irrigation_sources["area_in_region_mask"] = ( - gpd.overlay(observed_irrigation_sources, region_mask, how="intersection").area - / observed_irrigation_sources.area.values - ).values - - observed_irrigation_sources = observed_irrigation_sources.join( - simulated_subdistricts["region_id"] - ) - observed_irrigation_sources.set_index("region_id", inplace=True) - + observed_irrigation_sources = gpd.read_file(os.path.join(config['general']['original_data'], 'census', 'output', 'irrigation_source_2010-2011.geojson')).to_crs(3857) + simulated_subdistricts = gpd.read_file(os.path.join(config['general']['input_folder'], 'areamaps', 'regions.geojson')) + # set index to unique ID combination of state, district and subdistrict + observed_irrigation_sources.set_index(['state_code', 'district_c', 'sub_distri'], inplace=True) + simulated_subdistricts.set_index(['state_code', 'district_c', 'sub_distri'], inplace=True) + # select rows from observed_irrigation_sources where the index is in simulated_subdistricts + observed_irrigation_sources = observed_irrigation_sources.loc[simulated_subdistricts.index] + + region_mask = gpd.read_file(os.path.join(config['general']['input_folder'], 'areamaps', 'region.geojson')).to_crs(3857) + assert len(region_mask) == 1 + # get overlapping areas of observed_irrigation_sources and region_mask + observed_irrigation_sources['area_in_region_mask'] = (gpd.overlay(observed_irrigation_sources, region_mask, how='intersection').area / observed_irrigation_sources.area.values).values + + # ANALYSIS_THRESHOLD = 0.5 + + # observed_irrigation_sources = observed_irrigation_sources[observed_irrigation_sources['area_in_region_mask'] > ANALYSIS_THRESHOLD] + observed_irrigation_sources = observed_irrigation_sources.join(simulated_subdistricts['region_id']) + observed_irrigation_sources.set_index('region_id', inplace=True) + + total_holdings_observed = observed_irrigation_sources[[c for c in observed_irrigation_sources.columns if c.endswith('total_holdings')]].sum(axis=1) + total_holdings_with_well_observed = ( + observed_irrigation_sources[[c for c in observed_irrigation_sources.columns if c.endswith('well_holdings')]].sum(axis=1) + + observed_irrigation_sources[[c for c in observed_irrigation_sources.columns if c.endswith('tubewell_holdings')]].sum(axis=1) + ) + ratio_holdings_with_well_observed = total_holdings_with_well_observed / total_holdings_observed + return ratio_holdings_with_well_observed def handle_ctrl_c(func): @wraps(func) def wrapper(*args, **kwargs): global ctrl_c_entered if not ctrl_c_entered: - signal.signal(signal.SIGINT, default_sigint_handler) # the default + signal.signal(signal.SIGINT, default_sigint_handler) # the default try: return func(*args, **kwargs) except KeyboardInterrupt: @@ -107,880 +92,669 @@ def wrapper(*args, **kwargs): signal.signal(signal.SIGINT, pool_ctrl_c_handler) else: return KeyboardInterrupt - return wrapper - def pool_ctrl_c_handler(*args, **kwargs): global ctrl_c_entered ctrl_c_entered = True - def multi_set(dict_obj, value, *attrs): - d = dict_obj - for attr in attrs[:-1]: - d = d[attr] - if attrs[-1] not in d: - raise KeyError(f"Key {attrs} does not exist in config file.") - d[attrs[-1]] = value - + d = dict_obj + for attr in attrs[:-1]: + d = d[attr] + if attrs[-1] not in d: + raise KeyError(f"Key {attrs} does not exist in config file.") + d[attrs[-1]] = value def get_irrigation_wells_score(run_directory, individual, config): - # TODO: Fix this function - pass - # regions = np.load( - # os.path.join( - # run_directory, - # config["calibration"]["scenario"], - # "region_id", - # "20110101T000000.npz", - # ) - # )["data"] - # field_size = np.load( - # os.path.join( - # run_directory, - # config["calibration"]["scenario"], - # "field_size", - # "20110101T000000.npz", - # ) - # )["data"] - # irrigation_source = np.load( - # os.path.join( - # run_directory, - # config["calibration"]["scenario"], - # "irrigation_source", - # "20110101T000000.npz", - # ) - # )["data"] - - # well_irrigated = np.isin( - # irrigation_source, - # [irrigation_source_key["well"], irrigation_source_key["tubewell"]], - # ) - # # Calculate the ratio of farmers with a well per tehsil - # farmers_per_region = np.bincount(regions) - # well_irrigated_per_tehsil = np.bincount(regions, weights=well_irrigated) - # minimum_farmer_mask = np.where(farmers_per_region > 100) - # ratio_well_irrigated = ( - # well_irrigated_per_tehsil[minimum_farmer_mask] - # / farmers_per_region[minimum_farmer_mask] - # ) - - # well_irrigated = np.isin( - # irrigation_source, - # [irrigation_source_key["well"], irrigation_source_key["tubewell"]], - # ) - # # Calculate the ratio of farmers with a well per tehsil - # farmers_per_region = np.bincount(regions) - # well_irrigated_per_tehsil = np.bincount(regions, weights=well_irrigated) - # ratio_well_irrigated = well_irrigated_per_tehsil / farmers_per_region - - # ratio_holdings_with_well_observed = ratio_holdings_with_well_observed[ - # minimum_farmer_mask[0] - # ].values - # ratio_holdings_with_well_simulated = ratio_well_irrigated - - # minimum_well_mask = np.where(ratio_holdings_with_well_observed > 0.01) - - # irrigation_well_score = 1 - abs( - # ( - # (ratio_holdings_with_well_simulated - ratio_holdings_with_well_observed) - # / ratio_holdings_with_well_observed - # ) - # ) - - # total_farmers = farmers_per_region.sum() - # farmers_fraction = farmers_per_region[minimum_farmer_mask] / total_farmers - - # irrigation_well_score = float( - # np.sum( - # irrigation_well_score[minimum_well_mask] - # * farmers_fraction[minimum_well_mask] - # ) - # ) - # print( - # "run_id: " - # + str(individual.label) - # + ", IWS: " - # + "{0:.3f}".format(irrigation_well_score) - # ) - # with open( - # os.path.join(config["calibration"]["path"], "IWS_log.csv"), "a" - # ) as myfile: - # myfile.write(str(individual.label) + "," + str(irrigation_well_score) + "\n") - + regions = np.load(os.path.join(run_directory, config['calibration']['scenario'], 'region_id', '20110101T000000.npz'))['data'] + # field_size = np.load(os.path.join(run_directory, config['calibration']['scenario'], 'field_size', '20110101T000000.npz'))['data'] + irrigation_source = np.load(os.path.join(run_directory, config['calibration']['scenario'], 'irrigation_source', '20110101T000000.npz'))['data'] + + with open(os.path.join(config['general']['input_folder'], 'agents', 'farmers' , 'irrigation_sources.json')) as f: + irrigation_source_key = json.load(f) -def get_KGE_discharge(run_directory, individual, config, gauges, observed_streamflow): - def get_streamflows(gauge, observed_streamflow): - # Get the path of the simulated streamflow file - Qsim_tss = os.path.join(run_directory, SCENARIO, f"{gauge[0]} {gauge[1]}.csv") - # os.path.join(run_directory, 'base/discharge.csv') - - # Check if the simulated streamflow file exists - if not os.path.isfile(Qsim_tss): - print("run_id: " + str(individual.label) + " File: " + Qsim_tss) - raise Exception( - "No simulated streamflow found. Is the data exported in the ini-file (e.g., 'OUT_TSS_Daily = var.discharge'). Probably the model failed to start? Check the log files of the run!" - ) - - # Read the simulated streamflow data from the file - simulated_streamflow = pd.read_csv( - Qsim_tss, sep=",", parse_dates=True, index_col=0 - ) + well_irrigated = np.isin(irrigation_source, [irrigation_source_key['well'], irrigation_source_key['tubewell']]) + # Calculate the ratio of farmers with a well per tehsil + farmers_per_region = np.bincount(regions) + well_irrigated_per_tehsil = np.bincount(regions, weights=well_irrigated) + minimum_farmer_mask = np.where(farmers_per_region > 100) + ratio_well_irrigated = well_irrigated_per_tehsil[minimum_farmer_mask] / farmers_per_region[minimum_farmer_mask] - # parse the dates in the index - # simulated_streamflow.index = pd.date_range(config['calibration']['start_time'] + timedelta(days=1), config['calibration']['end_time']) + ratio_holdings_with_well_observed = get_observed_well_ratio(config) - simulated_streamflow_gauge = simulated_streamflow[" ".join(map(str, gauge))] - simulated_streamflow_gauge.name = "simulated" - observed_streamflow_gauge = observed_streamflow[gauge] - observed_streamflow_gauge.name = "observed" + ratio_holdings_with_well_observed = ratio_holdings_with_well_observed[minimum_farmer_mask[0]].values + ratio_holdings_with_well_simulated = ratio_well_irrigated - # Combine the simulated and observed streamflow data - streamflows = pd.concat( - [simulated_streamflow_gauge, observed_streamflow_gauge], - join="inner", - axis=1, - ) + minimum_well_mask = np.where(ratio_holdings_with_well_observed > 0.01) + + irrigation_well_score = 1 - abs(((ratio_holdings_with_well_simulated - ratio_holdings_with_well_observed) / ratio_holdings_with_well_observed)) - # Add a small value to the simulated streamflow to avoid division by zero - streamflows["simulated"] += 0.0001 - return streamflows - - streamflows = [get_streamflows(gauge, observed_streamflow) for gauge in gauges] - streamflows = [streamflow for streamflow in streamflows if not streamflow.empty] - if config["calibration"]["monthly"] is True: - # Calculate the monthly mean of the streamflow data - streamflows = [streamflows.resample("M").mean() for streamflows in streamflows] - - KGEs = [] - for streamflow in streamflows: - # print(f"Processing: {streamflow}") - KGEs.append( - KGE_calculation(s=streamflow["simulated"], o=streamflow["observed"]) - ) + total_farmers = farmers_per_region.sum() + farmers_fraction = farmers_per_region[minimum_farmer_mask] / total_farmers - assert KGEs # Check if KGEs is not empty - kge = np.mean(KGEs) + irrigation_well_score = float(np.sum(irrigation_well_score[minimum_well_mask] * farmers_fraction[minimum_well_mask])) + print("run_id: " + str(individual.label)+", IWS: "+"{0:.3f}".format(irrigation_well_score)) + with open(os.path.join(config['calibration']['path'],"IWS_log.csv"), "a") as myfile: + myfile.write(str(individual.label)+"," + str(irrigation_well_score)+"\n") - print("run_id: " + str(individual.label) + ", KGE: " + "{0:.3f}".format(kge)) - with open( - os.path.join(config["calibration"]["path"], "KGE_log.csv"), "a" - ) as myfile: - myfile.write(str(individual.label) + "," + str(kge) + "\n") + return irrigation_well_score - return kge +def get_KGE_discharge(run_directory, individual, config, gauges, observed_streamflow): + def get_streamflows(gauge, observed_streamflow): + # Get the path of the simulated streamflow file + Qsim_tss = os.path.join(run_directory, config['calibration']['scenario'], f"{gauge[0]} {gauge[1]}.csv") + # os.path.join(run_directory, 'base/discharge.csv') + + # Check if the simulated streamflow file exists + if not os.path.isfile(Qsim_tss): + print("run_id: " + str(individual.label)+" File: "+ Qsim_tss) + raise Exception("No simulated streamflow found. Is the data exported in the ini-file (e.g., 'OUT_TSS_Daily = var.discharge'). Probably the model failed to start? Check the log files of the run!") + + # Read the simulated streamflow data from the file + simulated_streamflow = pd.read_csv(Qsim_tss, sep=",", parse_dates=True, index_col=0) + + # parse the dates in the index + # simulated_streamflow.index = pd.date_range(config['calibration']['start_time'] + timedelta(days=1), config['calibration']['end_time']) + + simulated_streamflow_gauge = simulated_streamflow[' '.join(map(str, gauge))] + simulated_streamflow_gauge.name = 'simulated' + observed_streamflow_gauge = observed_streamflow[gauge] + observed_streamflow_gauge.name = 'observed' + + # Combine the simulated and observed streamflow data + streamflows = pd.concat([simulated_streamflow_gauge, observed_streamflow_gauge], join='inner', axis=1) + + # Add a small value to the simulated streamflow to avoid division by zero + streamflows['simulated'] += 0.0001 + return streamflows + + streamflows = [get_streamflows(gauge, observed_streamflow) for gauge in gauges] + streamflows = [streamflow for streamflow in streamflows if not streamflow.empty] + if config['calibration']['monthly'] is True: + # Calculate the monthly mean of the streamflow data + streamflows = [streamflows.resample('M').mean() for streamflows in streamflows] + + KGEs = [] + for streamflow in streamflows: + # print(f"Processing: {streamflow}") + KGEs.append(KGE_calculation(s=streamflow['simulated'], o=streamflow['observed'])) + + assert KGEs # Check if KGEs is not empty + kge = np.mean(KGEs) + + print("run_id: " + str(individual.label)+", KGE: "+"{0:.3f}".format(kge)) + with open(os.path.join(config['calibration']['path'],"KGE_log.csv"), "a") as myfile: + myfile.write(str(individual.label)+"," + str(kge)+"\n") + + return kge def get_KGE_yield_ratio(run_directory, individual, config): - observed_yield_ratios = get_observed_yield_ratios(run_directory, config) - yield_ratios_simulated_path = os.path.join( - run_directory, SCENARIO, "yield_ratio.csv" - ) - # Check if the simulated streamflow file exists - if not os.path.isfile(yield_ratios_simulated_path): - print( - "run_id: " + str(individual.label) + " File: " + yield_ratios_simulated_path - ) - raise Exception( - "No simulated streamflow found. Is the data exported in the ini-file (e.g., 'OUT_TSS_Daily = var.discharge'). Probably the model failed to start? Check the log files of the run!" - ) - - # Read the simulated yield ratios from the file - simulated_yield_ratio = pd.read_csv( - yield_ratios_simulated_path, sep=",", parse_dates=True, index_col=0 - ) - simulated_yield_ratio = simulated_yield_ratio["yield_ratio"] - - # Name and resample to yearly data - simulated_yield_ratio.name = "simulated" - simulated_yield_ratio = simulated_yield_ratio.resample("Y").mean() - - # Take the first instead of last day of the year - simulated_yield_ratio.index = simulated_yield_ratio.index.to_period("Y").start_time - - yield_ratios_combined = pd.concat( - [simulated_yield_ratio, observed_yield_ratios], join="inner", axis=1 - ) - # Add a small value to the simulated streamflow to avoid division by zero - yield_ratios_combined["simulated"] += 0.0001 - - kge = KGE_calculation( - s=yield_ratios_combined["simulated"], o=yield_ratios_combined["observed"] - ) - - print( - "run_id: " - + str(individual.label) - + ", KGE yield ratio: " - + "{0:.3f}".format(kge) - ) - with open( - os.path.join(config["calibration"]["path"], "KGE_yield_ratio_log.csv"), "a" - ) as myfile: - myfile.write(str(individual.label) + "," + str(kge) + "\n") - - return kge - + observed_yield_ratios = get_observed_yield_ratios(run_directory, config) + yield_ratios_simulated_path = os.path.join(run_directory, config['calibration']['scenario'], "yield_ratio.csv") + # Check if the simulated streamflow file exists + if not os.path.isfile(yield_ratios_simulated_path): + print("run_id: " + str(individual.label)+" File: "+ yield_ratios_simulated_path) + raise Exception("No simulated streamflow found. Is the data exported in the ini-file (e.g., 'OUT_TSS_Daily = var.discharge'). Probably the model failed to start? Check the log files of the run!") + + # Read the simulated yield ratios from the file + simulated_yield_ratio= pd.read_csv(yield_ratios_simulated_path, sep=",", parse_dates=True, index_col=0) + simulated_yield_ratio = simulated_yield_ratio['yield_ratio'] + + # Name and resample to yearly data + simulated_yield_ratio.name = 'simulated' + simulated_yield_ratio = simulated_yield_ratio.resample('Y').mean() + + # Take the first instead of last day of the year + simulated_yield_ratio.index = simulated_yield_ratio.index.to_period('Y').start_time + + yield_ratios_combined = pd.concat([simulated_yield_ratio, observed_yield_ratios], join='inner', axis=1) + # Add a small value to the simulated streamflow to avoid division by zero + yield_ratios_combined['simulated'] += 0.0001 + + kge = KGE_calculation(s=yield_ratios_combined['simulated'], o=yield_ratios_combined['observed']) + + print("run_id: " + str(individual.label)+", KGE yield ratio: "+"{0:.3f}".format(kge)) + with open(os.path.join(config['calibration']['path'],"KGE_yield_ratio_log.csv"), "a") as myfile: + myfile.write(str(individual.label)+"," + str(kge)+"\n") + + return kge def get_observed_yield_ratios(run_directory, config): - regions = np.load( - os.path.join( - run_directory, - config["calibration"]["scenario"], - "region_id", - "20030101T000000.npz", - ) - )["data"] - simulated_subdistricts = gpd.read_file( - os.path.join(config["general"]["input_folder"], "areamaps", "regions.geojson") + regions = np.load(os.path.join(run_directory, config['calibration']['scenario'], 'region_id', '20030101T000000.npz'))['data'] + simulated_subdistricts = gpd.read_file(os.path.join(config['general']['input_folder'], 'areamaps', 'regions.geojson')) + unique_subdistricts = np.unique(simulated_subdistricts['district_c']) + + observed_yield_ratios = {} + for subdistrict in unique_subdistricts: + district_path = os.path.join(config['general']['original_data'], 'calibration', 'yield_ratio', f"{subdistrict}.csv") + yield_ratio_data = pd.read_csv(district_path, sep=";", parse_dates=True, index_col=0) + + observed_yield_ratios[subdistrict] = yield_ratio_data["yield_ratio"] + assert (observed_yield_ratios[subdistrict] >= 0).all() + + # Determine the proportion of farmers per district + district_c_series = simulated_subdistricts['district_c'].astype(int) + farmers_per_subregion = np.bincount(regions) + + # combine the + combined_dataframe = pd.DataFrame({ + "district": district_c_series, + "total_farmers": farmers_per_subregion + }) + + # Determine the fractions of farmers per district + farmers_per_district = combined_dataframe.groupby('district')['total_farmers'].sum() + total_farmers = farmers_per_district.sum() + farmers_fraction = farmers_per_district / total_farmers + + summed_series = pd.Series(dtype=float) + # Use the fractions to get the average yield ratios for this region + for subdistrict in unique_subdistricts: + yield_ratio_fraction = observed_yield_ratios[subdistrict] * farmers_fraction[int(subdistrict)] + summed_series = summed_series.add(yield_ratio_fraction, fill_value=0) + + summed_series.name = 'observed' + + return summed_series + +def get_observed_water_use(calibration_config): + # Read the data + fp = os.path.join(calibration_config['observed_data'], 'water_use', 'murray_water_use.csv') + data_df = pd.read_csv(fp) + data_df['Value'] = pd.to_numeric(data_df['Value'], errors='coerce') + + # Define irrigation types + irrigation_types = ['surface_irrigation', 'drip_or_trickle_irrigation', 'sprinkler_irrigation'] + + # Calculate total irrigation and fractions for each Year and Region + irrigation_df = data_df[data_df['Description'].isin(irrigation_types)] + irrigation_pivot = irrigation_df.pivot_table( + index=['Year', 'Region'], + columns='Description', + values='Value', + aggfunc='first' ) - unique_subdistricts = np.unique(simulated_subdistricts["district_c"]) - - observed_yield_ratios = {} - for subdistrict in unique_subdistricts: - district_path = os.path.join( - config["general"]["original_data"], - "calibration", - "yield_ratio", - f"{subdistrict}.csv", - ) - yield_ratio_data = pd.read_csv( - district_path, sep=";", parse_dates=True, index_col=0 + + # Calculate total irrigation + irrigation_pivot['total_irrigation'] = irrigation_pivot[irrigation_types].sum(axis=1) + + # Calculate fractions + for irr_type in irrigation_types: + irrigation_pivot['fraction_' + irr_type] = ( + irrigation_pivot[irr_type] / irrigation_pivot['total_irrigation'] ) - - observed_yield_ratios[subdistrict] = yield_ratio_data["yield_ratio"] - assert (observed_yield_ratios[subdistrict] >= 0).all() - - # Determine the proportion of farmers per district - district_c_series = simulated_subdistricts["district_c"].astype(int) - farmers_per_subregion = np.bincount(regions) - - # combine the - combined_dataframe = pd.DataFrame( - {"district": district_c_series, "total_farmers": farmers_per_subregion} + + # Melt fractions back into long format + fraction_cols = ['fraction_' + irr_type for irr_type in irrigation_types] + fraction_df = irrigation_pivot[fraction_cols].reset_index().melt( + id_vars=['Year', 'Region'], + value_vars=fraction_cols, + var_name='Description', + value_name='Value' ) - - # Determine the fractions of farmers per district - farmers_per_district = combined_dataframe.groupby("district")["total_farmers"].sum() - total_farmers = farmers_per_district.sum() - farmers_fraction = farmers_per_district / total_farmers - - summed_series = pd.Series(dtype=float) - # Use the fractions to get the average yield ratios for this region - for subdistrict in unique_subdistricts: - yield_ratio_fraction = ( - observed_yield_ratios[subdistrict] * farmers_fraction[int(subdistrict)] - ) - summed_series = summed_series.add(yield_ratio_fraction, fill_value=0) - - summed_series.name = "observed" - - return summed_series - + + # Combine fractions with original data + combined_data_df = pd.concat([data_df, fraction_df[['Year', 'Region', 'Description', 'Value']]], ignore_index=True) + + # Proceed with ratio-based estimation + pivot_df = combined_data_df.pivot_table( + index=['Year', 'Description'], + columns='Region', + values='Value', + aggfunc='first' + ) + # Do not reset the index here to keep 'Year' and 'Description' as MultiIndex + + # Define target and reference regions + reference_regions = { + 'murray': 'NSW', + 'goulburn_broken': 'VICT', + 'north_central': 'VICT', + 'north_east': 'VICT' + } + + for target_region, ref_region in reference_regions.items(): + if target_region in pivot_df.columns and ref_region in pivot_df.columns: + # Calculate the ratio + pivot_df['Ratio'] = pivot_df[target_region] / pivot_df[ref_region] + # Calculate the average ratio for each Description + ratio_by_description = pivot_df.groupby(level='Description')['Ratio'].mean() + # Map the average ratio back + pivot_df['Avg_Ratio'] = pivot_df.index.get_level_values('Description').map(ratio_by_description) + # Estimate missing target region values + pivot_df['Estimated'] = pivot_df[ref_region] * pivot_df['Avg_Ratio'] + # Fill in missing values + pivot_df[target_region] = pivot_df[target_region].fillna(pivot_df['Estimated']) + # Drop helper columns + pivot_df = pivot_df.drop(columns=['Ratio', 'Avg_Ratio', 'Estimated']) + + # Ensure fractions sum to 1 + fraction_descriptions = ['fraction_' + irr_type for irr_type in irrigation_types] + fractions_df = pivot_df.loc[pivot_df.index.get_level_values('Description').isin(fraction_descriptions)].copy() + # Replace NaN with 0 for fractions + fractions_df = fractions_df.fillna(0) + # Sum fractions for each Year and Region + fractions_sum = fractions_df.groupby(level=['Year']).sum() + # Normalize fractions + fractions_normalized = fractions_df.div(fractions_sum) + # Update pivot_df with normalized fractions + pivot_df.update(fractions_normalized) + + fp_new = os.path.join(calibration_config['observed_data'], 'water_use', 'murray_water_use_final.csv') + pivot_df.to_csv(fp_new) + + return pivot_df + @handle_ctrl_c def run_model(individual, config, gauges, observed_streamflow): - """ - This function takes an individual from the population and runs the model with the corresponding parameters. - It first checks if the run directory already exists and whether the model was run before. - If the directory exists and the model was run before, it skips running the model. - Otherwise, it runs the model and saves the results to the run directory. - """ + """ + This function takes an individual from the population and runs the model with the corresponding parameters. + It first checks if the run directory already exists and whether the model was run before. + If the directory exists and the model was run before, it skips running the model. + Otherwise, it runs the model and saves the results to the run directory. + """ + + os.makedirs(config['calibration']['path'], exist_ok=True) + runs_path = os.path.join(config['calibration']['path'], 'runs') + os.makedirs(runs_path, exist_ok=True) + logs_path = os.path.join(config['calibration']['path'], 'logs') + os.makedirs(logs_path, exist_ok=True) + + # Define the directory where the model run will be stored + run_directory = os.path.join(runs_path, individual.label) + + # Check if the run directory already exists + if os.path.isdir(run_directory): + # If the directory exists, check if the model was run before + if os.path.exists(os.path.join(run_directory, 'done.txt')): + # If the model was run before, set runmodel to False + runmodel = False + else: + # If the model was not run before, set runmodel to True and delete the directory + runmodel = True + shutil.rmtree(run_directory) + else: + # If the directory does not exist, set runmodel to True + runmodel = True + + if runmodel: + # Convert the individual's parameter ratios to the corresponding parameter values + individual_parameter_ratio = individual.tolist() + # Assert that all parameter ratios are between 0 and 1 + assert (np.array(individual_parameter_ratio) >= 0).all() and (np.array(individual_parameter_ratio) <= 1).all() + + # Create a dictionary of the individual's parameters + individual_parameters = {} + for i, parameter_data in enumerate(config['calibration']['parameters'].values()): + individual_parameters[parameter_data['variable']] = \ + parameter_data['min'] + individual_parameter_ratio[i] * (parameter_data['max'] - parameter_data['min']) + + # Create the configuration file for the model run + config_path = os.path.join(run_directory, 'config.yml') + while True: + os.mkdir(run_directory) + template = deepcopy(config) + + template['general']['export_inital_on_spinup'] = True + + template['general']['report_folder'] = run_directory + template['general']['initial_conditions_folder'] = os.path.join(run_directory, 'initial') + + + # Update the template configuration file with the individual's parameters + template['general']['spinup_time'] = config['calibration']['spinup_time'] + template['general']['start_time'] = config['calibration']['start_time'] + template['general']['end_time'] = config['calibration']['end_time'] + + template['report'] = {} + template['report_cwatm'] = {} + + template.update(config['calibration']['target_variables']) + + # loop through individual parameters and set them in the template + for parameter, value in individual_parameters.items(): + multi_set(template, value, *parameter.split('.')) + + # write the template to the specified config file + with open(config_path, 'w') as f: + yaml.dump(template, f) + + # acquire lock to check and set GPU usage + lock.acquire() + if current_gpu_use_count.value < n_gpu_spots: + use_gpu = int(current_gpu_use_count.value / config['calibration']['DEAP']['models_per_gpu']) + current_gpu_use_count.value += 1 + print(f'Using 1 GPU, current_counter: {current_gpu_use_count.value}/{n_gpu_spots}') + else: + use_gpu = False + print(f'Not using GPU, current_counter: {current_gpu_use_count.value}/{n_gpu_spots}') + lock.release() + + def run_model_scenario(run_command): + # Set the correct geb command run path + # geb_path = '/scistor/ivm/mka483/miniconda3/envs/geb_p2/bin/geb' + # command = [geb_path, run_command, "--config", config_path] + + command = [sys.executable, os.environ.get('GEB_PACKAGE_DIR') + '/geb/cli.py', run_command, '--config', config_path] + # build the command to run the script, including the use of a GPU if specified + + if use_gpu is not False: + command.extend(["--GPU", "--gpu_device", use_gpu]) + print(command, flush=True) + + # run the command and capture the output and errors + p = Popen(command, stdout=PIPE, stderr=PIPE) + output, errors = p.communicate() + + # check the return code of the command and handle accordingly + if p.returncode == 0: # model has run successfully + with open(os.path.join(logs_path, f"log{individual.label}_{run_command}.txt"), 'w') as f: + content = "OUTPUT:\n" + str(output.decode())+"\nERRORS:\n" + str(errors.decode()) + f.write(content) + modflow_folder = os.path.join(run_directory, 'spinup', 'modflow_model') + if os.path.exists(modflow_folder): + shutil.rmtree(modflow_folder) + + elif p.returncode == 1: # model has failed + with open(os.path.join(logs_path, f"log{individual.label}_{run_command}_{''.join((random.choice(string.ascii_lowercase) for x in range(10)))}.txt"), 'w') as f: + content = "OUTPUT:\n" + str(output.decode())+"\nERRORS:\n" + str(errors.decode()) + f.write(content) + shutil.rmtree(run_directory) + + else: + raise ValueError("Return code of run.py was not 0 or 1, but instead " + str(p.returncode) + ".") + + return p.returncode + + return_code = run_model_scenario("spinup") + if return_code == 0: + return_code = run_model_scenario("run") + if return_code == 0: + # release the GPU if it was used + if use_gpu is not False: + lock.acquire() + current_gpu_use_count.value -= 1 + lock.release() + print(f'Released 1 GPU, current_counter: {current_gpu_use_count.value}/{n_gpu_spots}') + with open(os.path.join(run_directory, 'done.txt'), 'w') as f: + f.write('done') + break + + # release the GPU if it was used + if use_gpu is not False: + lock.acquire() + current_gpu_use_count.value -= 1 + lock.release() + print(f'Released 1 GPU, current_counter: {current_gpu_use_count.value}/{n_gpu_spots}') + + scores = [] + for score in config['calibration']['calibration_targets']: + if score == 'KGE_discharge': + scores.append(get_KGE_discharge(run_directory, individual, config, gauges, observed_streamflow)) + if score == 'irrigation_wells': + scores.append(get_irrigation_wells_score(run_directory, individual, config)) + if score == 'KGE_yield_ratio': + scores.append(get_KGE_yield_ratio(run_directory, individual, config)) + return tuple(scores) - os.makedirs(config["calibration"]["path"], exist_ok=True) - runs_path = os.path.join(config["calibration"]["path"], "runs") - os.makedirs(runs_path, exist_ok=True) - logs_path = os.path.join(config["calibration"]["path"], "logs") - os.makedirs(logs_path, exist_ok=True) - - # Define the directory where the model run will be stored - run_directory = os.path.join(runs_path, individual.label) - - # Check if the run directory already exists - if os.path.isdir(run_directory): - # If the directory exists, check if the model was run before - if os.path.exists(os.path.join(run_directory, "done.txt")): - # If the model was run before, set runmodel to False - runmodel = False - else: - # If the model was not run before, set runmodel to True and delete the directory - runmodel = True - shutil.rmtree(run_directory) - else: - # If the directory does not exist, set runmodel to True - runmodel = True - - if runmodel: - # TODO: Template is not correctly loaded - template = None - - # set the map from which to get the yield ratio - SPEI relations as the map of the generation's first run - template["general"]["load_pre_spinup"] = config["calibration"][ - "load_pre_spinup" - ] - if config["calibration"]["load_pre_spinup"]: - generation = individual.label[:2] - initial_run_label = generation + "_000" - initial_relations_path = os.path.join( - run_directory, "..", initial_run_label, "initial_relations" - ) - template["general"]["initial_relations_folder"] = initial_relations_path - else: - template["general"]["initial_relations_folder"] = os.path.join( - run_directory, "initial" - ) - - template["general"]["export_inital_on_spinup"] = True - - template["general"]["report_folder"] = run_directory - template["general"]["initial_conditions_folder"] = os.path.join( - run_directory, "initial" - ) - - individual_parameter_ratio = ( - None # TODO: this parameter is not well loaded below and should be fixed - ) - - # Create a dictionary of the individual's parameters - individual_parameters = {} - for i, parameter_data in enumerate( - config["calibration"]["parameters"].values() - ): - individual_parameters[parameter_data["variable"]] = parameter_data[ - "min" - ] + individual_parameter_ratio[i] * ( - parameter_data["max"] - parameter_data["min"] - ) - - # Create the configuration file for the model run - config_path = os.path.join(run_directory, "config.yml") - while True: - os.mkdir(run_directory) - template = deepcopy(config) - - # set the map from which to get the yield ratio - SPEI relations as the map of the generation's first run - template["general"]["load_pre_spinup"] = config["calibration"][ - "load_pre_spinup" - ] - if config["calibration"]["load_pre_spinup"]: - generation = individual.label[:2] - initial_run_label = generation + "_000" - initial_conditions_path = os.path.join( - run_directory, "..", initial_run_label, "initial_conditions" - ) - template["general"]["initial_relations_folder"] = ( - initial_conditions_path - ) - else: - template["general"]["initial_relations_folder"] = os.path.join( - run_directory, "initial_conditions" - ) - - template["general"]["export_inital_on_spinup"] = True - - template["general"]["report_folder"] = run_directory - template["general"]["initial_conditions_folder"] = os.path.join( - run_directory, "initial_conditions" - ) - - # Update the template configuration file with the individual's parameters - template["general"]["spinup_time"] = config["calibration"]["spinup_time"] - template["general"]["start_time"] = config["calibration"]["start_time"] - template["general"]["end_time"] = config["calibration"]["end_time"] - - template["report"] = {} - template["report_hydrology"] = {} - - template.update(config["calibration"]["target_variables"]) - - # loop through individual parameters and set them in the template - for parameter, value in individual_parameters.items(): - multi_set(template, value, *parameter.split(".")) - - # write the template to the specified config file - with open(config_path, "w") as f: - yaml.dump(template, f) - - # acquire lock to check and set GPU usage - lock.acquire() - if current_gpu_use_count.value < n_gpu_spots: - use_gpu = int( - current_gpu_use_count.value - / config["calibration"]["DEAP"]["models_per_gpu"] - ) - current_gpu_use_count.value += 1 - print( - f"Using 1 GPU, current_counter: {current_gpu_use_count.value}/{n_gpu_spots}" - ) - else: - use_gpu = False - print( - f"Not using GPU, current_counter: {current_gpu_use_count.value}/{n_gpu_spots}" - ) - lock.release() - - def run_model_scenario(scenario): - # build the command to run the script, including the use of a GPU if specified - command = [ - "geb", - "run", - "--config", - config_path, - "--scenario", - scenario, - ] - if use_gpu is not False: - command.extend(["--GPU", "--gpu_device", use_gpu]) - print(command, flush=True) - - # run the command and capture the output and errors - p = Popen(command, stdout=PIPE, stderr=PIPE) - output, errors = p.communicate() - - # check the return code of the command and handle accordingly - if p.returncode == 0: # model has run successfully - with open( - os.path.join( - logs_path, f"log{individual.label}_{scenario}.txt" - ), - "w", - ) as f: - content = ( - "OUTPUT:\n" - + str(output.decode()) - + "\nERRORS:\n" - + str(errors.decode()) - ) - f.write(content) - modflow_folder = os.path.join( - run_directory, "spinup", "modflow_model" - ) - if os.path.exists(modflow_folder): - shutil.rmtree(modflow_folder) - - elif p.returncode == 1: # model has failed - with open( - os.path.join( - logs_path, - f"log{individual.label}_{scenario}_{''.join((random.choice(string.ascii_lowercase) for x in range(10)))}.txt", - ), - "w", - ) as f: - content = ( - "OUTPUT:\n" - + str(output.decode()) - + "\nERRORS:\n" - + str(errors.decode()) - ) - f.write(content) - shutil.rmtree(run_directory) - - else: - raise ValueError( - "Return code of run.py was not 0 or 1, but instead " - + str(p.returncode) - + "." - ) - - return p.returncode - - if ( - is_first_run(individual.label) - and config["calibration"]["load_pre_spinup"] - ): - run_model_scenario("pre_spinup") - - return_code = run_model_scenario("spinup") - if return_code == 0: - return_code = run_model_scenario(config["calibration"]["scenario"]) - if return_code == 0: - # release the GPU if it was used - if use_gpu is not False: - lock.acquire() - current_gpu_use_count.value -= 1 - lock.release() - print( - f"Released 1 GPU, current_counter: {current_gpu_use_count.value}/{n_gpu_spots}" - ) - with open(os.path.join(run_directory, "done.txt"), "w") as f: - f.write("done") - break - - # release the GPU if it was used - if use_gpu is not False: - lock.acquire() - current_gpu_use_count.value -= 1 - lock.release() - print( - f"Released 1 GPU, current_counter: {current_gpu_use_count.value}/{n_gpu_spots}" - ) - - scores = [] - for score in config["calibration"]["calibration_targets"]: - if score == "KGE_discharge": - scores.append( - get_KGE_discharge( - run_directory, individual, config, gauges, observed_streamflow - ) - ) - if score == "irrigation_wells": - scores.append(get_irrigation_wells_score(run_directory, individual, config)) - if score == "KGE_yield_ratio": - scores.append(get_KGE_yield_ratio(run_directory, individual, config)) - return tuple(scores) - - scores = [] - for score in config["calibration"]["calibration_targets"]: - if score == "KGE_discharge": - scores.append( - get_KGE_discharge( - run_directory, individual, config, gauges, observed_streamflow - ) - ) - if score == "irrigation_wells": - scores.append(get_irrigation_wells_score(run_directory, individual, config)) - # if score == 'KGE_yield_ratio': - # scores.append(get_KGE_yield_ratio(run_directory, individual, config)) - return tuple(scores) +def is_first_run(label): + # Check if the last three characters of the string are '000', meaning the first of a new generation + return label[-3:] == '000' +def export_front_history(config, ngen, effmax, effmin, effstd, effavg): + # Save history of the change in objective function scores during calibration to csv file + print(">> Saving optimization history (front_history.csv)") + front_history = {} + for i, calibration_value in enumerate(config['calibration']['calibration_targets']): + front_history.update({ + (calibration_value, 'effmax_R', ): effmax[:, i], + (calibration_value, 'effmin_R'): effmin[:, i], + (calibration_value, 'effstd_R'): effstd[:, i], + (calibration_value, 'effavg_R'): effavg[:, i], + }) + front_history = pd.DataFrame( + front_history, + index=list(range(ngen)) + ) + front_history.to_excel(os.path.join(config['calibration']['path'], "front_history.xlsx")) -def is_first_run(label): - # Check if the last three characters of the string are '000', meaning the first of a new generation - return label[-3:] == "000" +def init_pool(manager_current_gpu_use_count, manager_lock, gpus, models_per_gpu): + # set global variable for each process in the pool: + global ctrl_c_entered + global default_sigint_handler + ctrl_c_entered = False + default_sigint_handler = signal.signal(signal.SIGINT, pool_ctrl_c_handler) + + global lock + global current_gpu_use_count + global n_gpu_spots + n_gpu_spots = gpus * models_per_gpu + lock = manager_lock + current_gpu_use_count = manager_current_gpu_use_count +def calibrate(config, working_directory): + calibration_config = config['calibration'] -def export_front_history(config, ngen, effmax, effmin, effstd, effavg): - # Save history of the change in objective function scores during calibration to csv file - print(">> Saving optimization history (front_history.csv)") - front_history = {} - for i, calibration_value in enumerate(config["calibration"]["calibration_targets"]): - front_history.update( - { - ( - calibration_value, - "effmax_R", - ): effmax[:, i], - (calibration_value, "effmin_R"): effmin[:, i], - (calibration_value, "effstd_R"): effstd[:, i], - (calibration_value, "effavg_R"): effavg[:, i], - } - ) - front_history = pd.DataFrame(front_history, index=list(range(ngen))) - front_history.to_excel( - os.path.join(config["calibration"]["path"], "front_history.xlsx") - ) + use_multiprocessing = calibration_config['DEAP']['use_multiprocessing'] + select_best_n_individuals = calibration_config['DEAP']['select_best'] -def init_pool(manager_current_gpu_use_count, manager_lock, gpus, models_per_gpu): - # set global variable for each process in the pool: - global ctrl_c_entered - global default_sigint_handler - ctrl_c_entered = False - default_sigint_handler = signal.signal(signal.SIGINT, pool_ctrl_c_handler) + ngen = calibration_config['DEAP']['ngen'] + mu = calibration_config['DEAP']['mu'] + lambda_ = calibration_config['DEAP']['lambda_'] + config['calibration']['scenario'] = calibration_config['scenario'] - global lock - global current_gpu_use_count - global n_gpu_spots - n_gpu_spots = gpus * models_per_gpu - lock = manager_lock - current_gpu_use_count = manager_current_gpu_use_count + # Load irrigation water use data + get_observed_water_use(calibration_config) + # Load observed streamflow + gauges = [tuple(gauge) for gauge in config['general']['gauges']] + observed_streamflow = {} + for gauge in gauges: + streamflow_path = os.path.join(calibration_config['observed_data'], 'streamflow', f"{gauge[0]} {gauge[1]}.csv") + streamflow_data = pd.read_csv(streamflow_path, sep=",", parse_dates=True, index_col=0) + observed_streamflow[gauge] = streamflow_data["flow"] + # drop all rows with NaN values + observed_streamflow[gauge] = observed_streamflow[gauge].dropna() + observed_streamflow[gauge].name = 'observed' + assert (observed_streamflow[gauge] >= 0).all() -def calibrate(config, working_directory): - calibration_config = config["calibration"] - - use_multiprocessing = calibration_config["DEAP"]["use_multiprocessing"] - - select_best_n_individuals = calibration_config["DEAP"]["select_best"] - - ngen = calibration_config["DEAP"]["ngen"] - mu = calibration_config["DEAP"]["mu"] - lambda_ = calibration_config["DEAP"]["lambda_"] - config["calibration"]["scenario"] = calibration_config["scenario"] - - # Load observed streamflow - gauges = [tuple(gauge) for gauge in config["general"]["gauges"]] - observed_streamflow = {} - for gauge in gauges: - streamflow_path = os.path.join( - config["general"]["original_data"], - "calibration", - "streamflow", - f"{gauge[0]} {gauge[1]}.csv", - ) - streamflow_data = pd.read_csv( - streamflow_path, sep=",", parse_dates=True, index_col=0 - ) - observed_streamflow[gauge] = streamflow_data["flow"] - # drop all rows with NaN values - observed_streamflow[gauge] = observed_streamflow[gauge].dropna() - observed_streamflow[gauge].name = "observed" - assert (observed_streamflow[gauge] >= 0).all() - # with open(os.path.join(config['general']['input_folder'], 'agents', 'farmers' ,'attributes', 'irrigation_sources.json')) as f: - # irrigation_source_key = json.load(f) + # with open(os.path.join(config['general']['input_folder'], 'agents', 'farmers' ,'attributes', 'irrigation_sources.json')) as f: + # irrigation_source_key = json.load(f) # Create the fitness class using DEAP's creator class - creator.create( - "FitnessMulti", - base.Fitness, - weights=tuple(config["calibration"]["calibration_targets"].values()), - ) - + creator.create("FitnessMulti", base.Fitness, weights=tuple(config['calibration']['calibration_targets'].values())) + # Create the individual class using DEAP's creator class # The individual class is an array of typecode 'd' with the FitnessMulti class as its fitness attribute - creator.create( - "Individual", array.array, typecode="d", fitness=creator.FitnessMulti - ) + creator.create("Individual", array.array, typecode='d', fitness=creator.FitnessMulti) # Create a toolbox object to register the functions used in the genetic algorithm - toolbox = base.Toolbox() + toolbox = base.Toolbox() - # Register the attribute generator function + # Register the attribute generator function # This function generates a random float between 0 and 1 - toolbox.register("attr_float", random.uniform, 0, 1) - + toolbox.register("attr_float", random.uniform, 0, 1) + # Register the selection method # This function selects the best individuals from a population - toolbox.register("select", tools.selBest) - + toolbox.register("select", tools.selBest) + # Register the population initializer # This function creates a population of individuals, each with len(calibration_config['parameters']) number of attributes # Each attribute is generated using the attr_float function - toolbox.register( - "Individual", - tools.initRepeat, - creator.Individual, - toolbox.attr_float, - len(calibration_config["parameters"]), - ) - toolbox.register("population", tools.initRepeat, list, toolbox.Individual) - + toolbox.register("Individual", tools.initRepeat, creator.Individual, toolbox.attr_float, len(calibration_config['parameters'])) + toolbox.register("population", tools.initRepeat, list, toolbox.Individual) + # Define a function to check if an individual's attribute values are within the bounds - def checkBounds(min, max): - def decorator(func): - def wrappper(*args, **kargs): - offspring = func(*args, **kargs) + def checkBounds(min, max): + def decorator(func): + def wrappper(*args, **kargs): + offspring = func(*args, **kargs) # Iterate through the offspring, and for each attribute, check if it is within bounds # If it is out of bounds, set it to the maximum or minimum value respectively - for child in offspring: - for i in range(len(child)): - if child[i] > max: - child[i] = max - elif child[i] < min: - child[i] = min - return offspring - - return wrappper - - return decorator - - # Register the evaluation function - # This function runs the model and returns the fitness value of an individual - - partial_run_model_with_extra_arguments = partial( - run_model, config=config, gauges=gauges, observed_streamflow=observed_streamflow - ) - - toolbox.register("evaluate", partial_run_model_with_extra_arguments) - + for child in offspring: + for i in range(len(child)): + if child[i] > max: + child[i] = max + elif child[i] < min: + child[i] = min + return offspring + return wrappper + return decorator + + # Register the evaluation function + # This function runs the model and returns the fitness value of an individual + partial_run_model_with_extra_arguments = partial(run_model, config=config, gauges = gauges, observed_streamflow = observed_streamflow) + + toolbox.register("evaluate", partial_run_model_with_extra_arguments) + # Register the crossover function # This function mates two individuals using a blend crossover with an alpha value of 0.15 - toolbox.register("mate", tools.cxBlend, alpha=0.15) - + toolbox.register("mate", tools.cxBlend, alpha=0.15) + # Register the mutation function # This function mutates an individual using gaussian mutation with a mu of 0, sigma of 0.3, and indpb of 0.3 - toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.3, indpb=0.3) - + toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.3, indpb=0.3) + # Register the selection method # This function uses the NSGA-II algorithm to select individuals from a population - toolbox.register("select", tools.selNSGA2) + toolbox.register("select", tools.selNSGA2) # Create a history object to keep track of the population statistics - history = tools.History() + history = tools.History() # Decorate the crossover and mutation functions with the checkBounds function # This ensures that the attribute values of the offspring stay within the bounds - toolbox.decorate("mate", checkBounds(0, 1)) - toolbox.decorate("mutate", checkBounds(0, 1)) + toolbox.decorate("mate", checkBounds(0, 1)) + toolbox.decorate("mutate", checkBounds(0, 1)) # Create a manager for multiprocessing - manager = multiprocessing.Manager() + manager = multiprocessing.Manager() # Create a shared variable to keep track of the number of GPUs in use - current_gpu_use_count = manager.Value("i", 0) + current_gpu_use_count = manager.Value('i', 0) # Create a lock for managing access to the shared variable - manager_lock = manager.Lock() + manager_lock = manager.Lock() # Check if multiprocessing should be used - if use_multiprocessing is True: + if use_multiprocessing is True: # Get the number of CPU cores available for the pool - pool_size = int(os.getenv("SLURM_CPUS_PER_TASK") or 4) - print(f"Pool size: {pool_size}") + pool_size = int(os.getenv('SLURM_CPUS_PER_TASK') or 2) + print(f'Pool size: {pool_size}') # Ignore the interrupt signal - signal.signal(signal.SIGINT, signal.SIG_IGN) + signal.signal(signal.SIGINT, signal.SIG_IGN) # Create a multiprocessing pool with the specified number of processes # Initialize the pool with the shared variable and lock, and the number of GPUs available - pool = multiprocessing.Pool( - processes=pool_size, - initializer=init_pool, - initargs=( - current_gpu_use_count, - manager_lock, - calibration_config["gpus"], - ( - calibration_config["models_per_gpu"] - if "models_per_gpu" in calibration_config - else 1 - ), - ), - ) + pool = multiprocessing.Pool(processes=pool_size, initializer=init_pool, initargs=( + current_gpu_use_count, manager_lock, calibration_config['gpus'], calibration_config['models_per_gpu'] if 'models_per_gpu' in calibration_config else 1) + ) # Register the map function to use the multiprocessing pool - toolbox.register("map", pool.map) - else: + toolbox.register("map", pool.map) + else: # Initialize the pool without multiprocessing - init_pool( - current_gpu_use_count, - manager_lock, - calibration_config["gpus"], - ( - calibration_config["models_per_gpu"] - if "models_per_gpu" in calibration_config - else 1 - ), - ) + init_pool( + current_gpu_use_count, + manager_lock, calibration_config['gpus'], calibration_config['models_per_gpu'] if 'models_per_gpu' in calibration_config else 1) - # Set the probabilities of mating and mutation - cxpb = 0.7 # The probability of mating two individuals - mutpb = 0.3 # The probability of mutating an individual + # Set the probabilities of mating and mutation + cxpb = 0.7 # The probability of mating two individuals + mutpb = 0.3 # The probability of mutating an individual # Ensure that the probabilities add up to 1 - assert cxpb + mutpb == 1, "cxpb + mutpb must be equal to 1" + assert cxpb + mutpb == 1, "cxpb + mutpb must be equal to 1" # Create arrays to hold statistics about the population - effmax = np.full((ngen, len(config["calibration"]["calibration_targets"])), np.nan) - effmin = np.full((ngen, len(config["calibration"]["calibration_targets"])), np.nan) - effavg = np.full((ngen, len(config["calibration"]["calibration_targets"])), np.nan) - effstd = np.full((ngen, len(config["calibration"]["calibration_targets"])), np.nan) + effmax = np.full((ngen, len(config['calibration']['calibration_targets'])), np.nan) + effmin = np.full((ngen, len(config['calibration']['calibration_targets'])), np.nan) + effavg = np.full((ngen, len(config['calibration']['calibration_targets'])), np.nan) + effstd = np.full((ngen, len(config['calibration']['calibration_targets'])), np.nan) # Define the checkpoint file path - checkpoint = os.path.join(config["calibration"]["path"], "checkpoint.pkl") + checkpoint = os.path.join(config['calibration']['path'], "checkpoint.pkl") # Check if the checkpoint file exists - if os.path.exists(os.path.join(checkpoint)): + if os.path.exists(os.path.join(checkpoint)): # If the checkpoint file exists, load the population and other information from the checkpoint - with open(checkpoint, "rb") as cp_file: - cp = pickle.load(cp_file) - population = cp["population"] - start_gen = cp["generation"] - random.setstate(cp["rndstate"]) - if start_gen > 0: - offspring = cp["offspring"] - pareto_front = cp["pareto_front"] - else: - os.makedirs(config["calibration"]["path"], exist_ok=True) + with open(checkpoint, "rb" ) as cp_file: + cp = pickle.load(cp_file) + population = cp["population"] + start_gen = cp["generation"] + random.setstate(cp["rndstate"]) + if start_gen > 0: + offspring = cp["offspring"] + pareto_front = cp["pareto_front"] + else: + os.makedirs(config['calibration']['path'], exist_ok=True) # If the checkpoint file does not exist, start from the first generation - start_gen = 0 + start_gen = 0 # Create the initial population - population = toolbox.population(n=mu) + population = toolbox.population(n=mu) # Give each individual a label - for i, individual in enumerate(population): - individual.label = ( - str(start_gen % 1000).zfill(2) + "_" + str(i % 1000).zfill(3) - ) - pareto_front = tools.ParetoFront() - history.update(population) - - # Start the genetic algorithm loop - for generation in range(start_gen, ngen): - # If this is the first generation, save the initial population - if generation == 0: - cp = dict( - population=population, - generation=generation, - rndstate=random.getstate(), - pareto_front=pareto_front, - ) - else: - # Vary the population using crossover and mutation - offspring = algorithms.varOr(population, toolbox, lambda_, cxpb, mutpb) - for i, child in enumerate(offspring): - child.label = ( - str(generation % 1000).zfill(2) + "_" + str(i % 1000).zfill(3) - ) - # Save the population and offspring - cp = dict( - population=population, - generation=generation, - rndstate=random.getstate(), - offspring=offspring, - pareto_front=pareto_front, - ) - - # Save the checkpoint - with open(checkpoint, "wb") as cp_file: - pickle.dump(cp, cp_file) - - # Evaluate the individuals with an invalid fitness - if generation == 0: - individuals_to_evaluate = [ - ind for ind in population if not ind.fitness.valid - ] - else: - individuals_to_evaluate = [ - ind for ind in offspring if not ind.fitness.valid - ] - fitnesses = list(toolbox.map(toolbox.evaluate, individuals_to_evaluate)) - if any(map(lambda x: isinstance(x, KeyboardInterrupt), fitnesses)): - raise KeyboardInterrupt - - for ind, fit in zip(individuals_to_evaluate, fitnesses): - ind.fitness.values = fit - - # Update the hall of fame with the generated individuals - if generation == 0: - pareto_front.update(population) - population[:] = toolbox.select(population, lambda_) - else: - pareto_front.update(offspring) - population[:] = toolbox.select( - population + offspring, select_best_n_individuals - ) - - # Select the next generation population - history.update(population) - - # Loop through the different objective functions and calculate some statistics - # from the Pareto optimal population - for ii in range(len(config["calibration"]["calibration_targets"])): - effmax[generation, ii] = np.amax( - [pareto_front[x].fitness.values[ii] for x in range(len(pareto_front))] - ) - effmin[generation, ii] = np.amin( - [pareto_front[x].fitness.values[ii] for x in range(len(pareto_front))] - ) - effavg[generation, ii] = np.average( - [pareto_front[x].fitness.values[ii] for x in range(len(pareto_front))] - ) - effstd[generation, ii] = np.std( - [pareto_front[x].fitness.values[ii] for x in range(len(pareto_front))] - ) - # print(">> gen: " + str(generation) + ", effmax_KGE: "+"{0:.3f}".format(effmax[generation, 0])) - - # print(">> gen: " + str(generation) + ", effmax_irrigation_equipment: "+"{0:.3f}".format(effmax[generation, 1])) - - print( - ">> gen: " - + str(generation) - + ", effmax_KGE: " - + "{0:.3f}".format(effmax[generation, 0]) - ) - # print(">> gen: " + str(generation) + ", effmax_irrigation_equipment: "+"{0:.3f}".format(effmax[generation, 1])) - - # Closing the multiprocessing pool - if use_multiprocessing is True: - pool.close() + for i, individual in enumerate(population): + individual.label = str(start_gen % 1000).zfill(2) + '_' + str(i % 1000).zfill(3) + pareto_front = tools.ParetoFront() + history.update(population) - global ctrl_c_entered - global default_sigint_handler - ctrl_c_entered = False - default_sigint_handler = signal.signal(signal.SIGINT, pool_ctrl_c_handler) - - export_front_history(config, ngen, effmax, effmin, effstd, effavg) + # Start the genetic algorithm loop + for generation in range(start_gen, ngen): + # If this is the first generation, save the initial population + if generation == 0: + cp = dict(population=population, generation=generation, rndstate=random.getstate(), pareto_front=pareto_front) + else: + # Vary the population using crossover and mutation + offspring = algorithms.varOr(population, toolbox, lambda_, cxpb, mutpb) + for i, child in enumerate(offspring): + child.label = str(generation % 1000).zfill(2) + '_' + str(i % 1000).zfill(3) + # Save the population and offspring + cp = dict(population=population, generation=generation, rndstate=random.getstate(), offspring=offspring, pareto_front=pareto_front) + + # Save the checkpoint + with open(checkpoint, "wb") as cp_file: + pickle.dump(cp, cp_file) + + # Evaluate the individuals with an invalid fitness + if generation == 0: + individuals_to_evaluate = [ind for ind in population if not ind.fitness.valid] + else: + individuals_to_evaluate = [ind for ind in offspring if not ind.fitness.valid] + fitnesses = list(toolbox.map(toolbox.evaluate, individuals_to_evaluate)) + if any(map(lambda x: isinstance(x, KeyboardInterrupt), fitnesses)): + raise KeyboardInterrupt + + for ind, fit in zip(individuals_to_evaluate, fitnesses): + ind.fitness.values = fit + + # Update the hall of fame with the generated individuals + if generation == 0: + pareto_front.update(population) + population[:] = toolbox.select(population, lambda_) + else: + pareto_front.update(offspring) + population[:] = toolbox.select(population + offspring, select_best_n_individuals) + + # Select the next generation population + history.update(population) + + # Loop through the different objective functions and calculate some statistics + # from the Pareto optimal population + for ii in range(len(config['calibration']['calibration_targets'])): + effmax[generation, ii] = np.amax([pareto_front[x].fitness.values[ii] for x in range(len(pareto_front))]) + effmin[generation, ii] = np.amin([pareto_front[x].fitness.values[ii] for x in range(len(pareto_front))]) + effavg[generation, ii] = np.average([pareto_front[x].fitness.values[ii] for x in range(len(pareto_front))]) + effstd[generation, ii] = np.std([pareto_front[x].fitness.values[ii] for x in range(len(pareto_front))]) + # print(">> gen: " + str(generation) + ", effmax_KGE: "+"{0:.3f}".format(effmax[generation, 0])) + + + # print(">> gen: " + str(generation) + ", effmax_irrigation_equipment: "+"{0:.3f}".format(effmax[generation, 1])) + + # Closing the multiprocessing pool + if use_multiprocessing is True: + pool.close() + + global ctrl_c_entered + global default_sigint_handler + ctrl_c_entered = False + default_sigint_handler = signal.signal(signal.SIGINT, pool_ctrl_c_handler) + + export_front_history(config, ngen, effmax, effmin, effstd, effavg) diff --git a/geb/data_catalog.yml b/geb/data_catalog.yml index e9be2780..aa45988c 100644 --- a/geb/data_catalog.yml +++ b/geb/data_catalog.yml @@ -64,6 +64,44 @@ MIRCA2000_cropping_calendar_{irrigated_or_rainfed}: source_version: 1.1 placeholders: irrigated_or_rainfed: [irrigated, rainfed] +MIRCA2000_cropping_calendar_{irrigated_or_rainfed}_{year}: + data_type: Dataset + path: /scistor/ivm/data_catalogue/agriculture/MIRCA2000/condensed_cropping_calendars_2024/MIRCA-OS_{year}_{irrigated_or_rainfed}.csv + meta: + paper_doi: 10.4211/hs.60a890eb841c460192c03bb590687145 + paper_ref: Kebede et al. (2024) (preprint) + source_license: CC BY 4.0 + source_url: https://www.hydroshare.org/resource/60a890eb841c460192c03bb590687145/ + placeholders: + irrigated_or_rainfed: [irrigated, rainfed] + year: ['2000', '2005', '2010', '2015'] +MIRCA2000_cropping_area_{year}_{resolution}_{crop}_{ir_or_rf}: + data_type: RasterDataset + driver: raster + path: /scistor/ivm/data_catalogue/agriculture/MIRCA2000_Annual_Harvested_Area_Grids/{year}/{resolution}/MIRCA-OS_{crop}_{year}_{ir_or_rf}.tif + meta: + paper_doi: 10.4211/hs.60a890eb841c460192c03bb590687145 + paper_ref: Kebede et al. (2024) (preprint) + source_license: CC BY 4.0 + source_url: https://www.hydroshare.org/resource/60a890eb841c460192c03bb590687145/ + placeholders: + year: ['2000', '2005', '2010', '2015'] + resolution: ['5-arcminute', '30-arcminute'] + crop: ['Barley', 'Cassava', 'Cocoa', 'Coffee', 'Cotton', 'Fodder', 'Groundnuts', 'Maize', 'Millet', 'Oil_palm', 'Others_annual', 'Others_perennial', 'Potatoes', 'Pulses', 'Rapeseed', 'Rice', 'Rye', 'Sorghum', 'Soybeans', 'Sugar_beet', 'Sugar_cane', 'Sunflower', 'Wheat'] + ir_or_rf: [ir, rf] +global_irrigation_area_{irrigation_type}: + data_type: RasterDataset + driver: raster + path: /scistor/ivm/GEB/data_catalog/irrigation/gmia_v5_{irrigation_type}_pct_aei.asc + crs: 4326 + meta: + paper_doi: 10.5194/hess-14-1863-2010 + paper_ref: Siebert et al. (2010) + source_license: CC BY 4.0 + source_url: https://www.fao.org/aquastat/en/geospatial-information/global-maps-irrigated-areas/latest-version/ + source_version: 5 + placeholders: + irrigation_type: [aei, aai, aeisw, aeigw] GTSM: data_type: Dataset path: /scistor/ivm/data_catalogue/water_level/GTSM/reanalysis_waterlevel_10min_*_v1.nc @@ -393,7 +431,7 @@ GDL_regions_v4: source_license: https://globaldatalab.org/termsofuse/ GLOPOP-S: data_type: Dataset - path: /scistor/ivm/data_catalogue/population/GLOPOP-S/{region}.dat.gz + path: /scistor/ivm/data_catalogue/population/synthpop_{region}.dat.gz ERA5_geopotential: data_type: RasterDataset driver: netcdf @@ -442,7 +480,7 @@ CHELSA-BIOCLIM+_monthly_{variable}_{month}_{year}: category: climate source_license: CC0 1.0 source_url: https://www.doi.org/10.16904/envidat.332 - path: https://os.zhdk.cloud.switch.ch/envicloud/chelsa/chelsa_V2/GLOBAL/monthly/{variable}/CHELSA_{variable}_{month}_{year}_V.2.1.tif + path: https://os.zhdk.cloud.switch.ch/chelsav2/GLOBAL/monthly/{variable}/CHELSA_{variable}_{month}_{year}_V.2.1.tif global_wind_atlas: crs: 4326 data_type: RasterDataset @@ -484,6 +522,24 @@ lowder_farm_sizes: paper_ref: Lowder Sarah K. et al. 2016 source_license: CC BY-NC-ND 4.0 source_url: https://doi.org/10.1016/j.worlddev.2015.10.041 +preferences_individual: + path: socioeconomic/social/cleaned_individual_data.csv + driver: csv + data_type: DataFrame + meta: + category: time and risk preferences + source_url: https://gps.iza.org/dataset/dataset_6708dcdda1d7b.zip + paper_doi: doi.org/10.1093/qje/qjy013 + paper_ref: Falk et al. (2018) +preferences_country: + path: socioeconomic/social/cleaned_country_data.csv + driver: csv + data_type: DataFrame + meta: + category: time and risk preferences + source_url: https://gps.iza.org/dataset/dataset_6708dcdda1d7b.zip + paper_doi: doi.org/10.1093/qje/qjy013 + paper_ref: Falk et al. (2018) cwatm_domestic_water_demand_{scenario}_year: crs: 4326 data_type: RasterDataset diff --git a/geb/hydrology/lakes_reservoirs.py b/geb/hydrology/lakes_reservoirs.py index 399611af..4f82fd79 100644 --- a/geb/hydrology/lakes_reservoirs.py +++ b/geb/hydrology/lakes_reservoirs.py @@ -320,6 +320,15 @@ def get_outflows(self, waterBodyID): upstream_area_within_waterbodies, waterBodyID ) + # in some cases the cell with the highest number of upstream cells + # has mulitple occurences in the same lake, this seems to happen + # especially for very small lakes with a small drainage area. + # In such cases, we take the outflow cell with the lowest elevation. + outflow_elevation = load_grid( + self.model.files["grid"]["routing/kinematic/outflow_elevation"] + ) + outflow_elevation = self.var.compress(outflow_elevation) + waterbody_outflow_points = np.where( self.var.upstream_area_n_cells == upstream_area_within_waterbodies, waterBodyID, diff --git a/geb/reasonable_default_config.yml b/geb/reasonable_default_config.yml index 4258403c..e14e2e95 100644 --- a/geb/reasonable_default_config.yml +++ b/geb/reasonable_default_config.yml @@ -26,11 +26,15 @@ agent_settings: market: dynamic_market: false price_frequency: yearly + government: + irrigation_limit: + limit: 10000 # previous tried 1700 + per: capita farmers: ruleset: base base_management_yield_ratio: 1 farmers_going_out_of_business: false - cultivation_cost_fraction: 0.5 + cultivation_cost_fraction: 0.1 microcredit: interest_rate: 0.2 loan_duration: 2 @@ -44,16 +48,16 @@ agent_settings: ruleset: base adaptation_sprinkler: ruleset: base - interest_rate: 0.04 - lifespan: 10 - loan_duration: 11 - yield_multiplier: 1 - decision_horizon: 10 + lifespan: 20 + loan_duration: 21 + decision_horizon: 20 + adaptation_irrigation_expansion: + ruleset: no-adaptation adaptation_well: loan_duration: 21 lifespan: 20 decision_horizon: 20 - ruleset: base + ruleset: no-adaptation pump_hours: 3.5 specific_weight_water: 9800 # (kg/m3*m/s2) max_initial_sat_thickness: 50 # m @@ -73,8 +77,8 @@ agent_settings: risk_perception: base: 1.6 coef: -2.5 - max: 4.4118829846402114 - min: 0.1 + max: 10 + min: 0.5 fix_activation_order: true reservoir_operators: MinOutflowQ: 0.0 diff --git a/geb/setup/geb.py b/geb/setup/geb.py index 915c6cfc..235d680f 100644 --- a/geb/setup/geb.py +++ b/geb/setup/geb.py @@ -59,6 +59,7 @@ M49_to_ISO3, SUPERWELL_NAME_TO_ISO3, GLOBIOM_NAME_TO_ISO3, + COUNTRY_NAME_TO_ISO3, ) from .workflows.forcing import ( reproject_and_apply_lapse_rate_temperature, @@ -83,6 +84,43 @@ compressor = Blosc(cname="lz4", clevel=3, shuffle=Blosc.BITSHUFFLE) +def create_grid_cell_id_array(area_fraction_da): + # Get the sizes of the spatial dimensions + ny, nx = area_fraction_da.sizes["y"], area_fraction_da.sizes["x"] + + # Create an array of sequential integers from 0 to ny*nx - 1 + grid_ids = np.arange(ny * nx).reshape(ny, nx) + + # Create a DataArray with the same coordinates and dimensions as your spatial grid + grid_id_da = xr.DataArray( + grid_ids, + coords={ + "y": area_fraction_da.coords["y"], + "x": area_fraction_da.coords["x"], + }, + dims=["y", "x"], + ) + + return grid_id_da + + +def get_neighbor_cell_ids(cell_id, nx, ny, radius=1): + row = cell_id // nx + col = cell_id % nx + + neighbor_cell_ids = [] + for dr in range(-radius, radius + 1): + for dc in range(-radius, radius + 1): + if dr == 0 and dc == 0: + continue # Skip the cell itself + r = row + dr + c = col + dc + if 0 <= r < ny and 0 <= c < nx: + neighbor_id = r * nx + c + neighbor_cell_ids.append(neighbor_id) + return neighbor_cell_ids + + @contextmanager def suppress_logging_warning(logger): """ @@ -605,6 +643,10 @@ def process_crop_data( GLOBIOM_regions["ISO3"] = GLOBIOM_regions["Country"].map( GLOBIOM_NAME_TO_ISO3 ) + # For my personal branch + GLOBIOM_regions.loc[ + GLOBIOM_regions["Country"] == "Switzerland", "Region37" + ] = "EU_MidWest" assert not np.any(GLOBIOM_regions["ISO3"].isna()), "Missing ISO3 codes" ISO3_codes_region = self.geoms["areamaps/regions"]["ISO3"].unique() @@ -636,17 +678,6 @@ def process_crop_data( # Drop crops with no data at all for these regions donor_data = donor_data.dropna(axis=1, how="all") - total_years = donor_data.index.get_level_values("year").unique() - - if project_past_until_year: - assert ( - total_years[0] > project_past_until_year - ), f"Extrapolation targets must not fall inside available data time series. Current lower limit is {total_years[0]}" - if project_future_until_year: - assert ( - total_years[-1] < project_future_until_year - ), f"Extrapolation targets must not fall inside available data time series. Current upper limit is {total_years[-1]}" - # Filter out columns that contain the word 'meat' donor_data = donor_data[ [ @@ -709,6 +740,17 @@ def process_crop_data( prices_plus_crop_price_inflation, unique_regions ) + total_years = data.index.get_level_values("year").unique() + + if project_past_until_year: + assert ( + total_years[0] > project_past_until_year + ), f"Extrapolation targets must not fall inside available data time series. Current lower limit is {total_years[0]}" + if project_future_until_year: + assert ( + total_years[-1] < project_future_until_year + ), f"Extrapolation targets must not fall inside available data time series. Current upper limit is {total_years[-1]}" + if ( project_past_until_year is not None or project_future_until_year is not None @@ -905,7 +947,7 @@ def donate_and_receive_crop_prices( columns=[donor_data_country.name], ) if data_out is None: - data_out = new_data + data_out = new_data.copy() else: data_out = data_out.combine_first(new_data) else: @@ -921,13 +963,14 @@ def donate_and_receive_crop_prices( columns=[column], ) if data_out is None: - data_out = new_data + data_out = new_data.copy() else: data_out = data_out.combine_first(new_data) - # Drop columns that are all NaN - data_out = data_out.dropna(axis=1, how="all") data_out = data_out.drop(columns=["ISO3"]) + data_out = data_out.dropna(axis=1, how="all") + data_out = data_out.dropna(axis=0, how="all") + return data_out def convert_price_using_ppp( @@ -3998,6 +4041,140 @@ def setup_farmers_from_csv(self, path=None): farmers = pd.read_csv(path, index_col=0) self.setup_farmers(farmers) + def determine_crop_area_fractions(self, resolution="5-arcminute"): + output_folder = "plot/mirca_crops" + os.makedirs(output_folder, exist_ok=True) + + crops = [ + "Wheat", # 0 + "Maize", # 1 + "Rice", # 2 + "Barley", # 3 + "Rye", # 4 + "Millet", # 5 + "Sorghum", # 6 + "Soybeans", # 7 + "Sunflower", # 8 + "Potatoes", # 9 + "Cassava", # 10 + "Sugar_cane", # 11 + "Sugar_beet", # 12 + "Oil_palm", # 13 + "Rapeseed", # 14 + "Groundnuts", # 15 + "Others_perennial", # 23 + "Fodder", # 24 + "Others_annual", # 25, + ] + + years = ["2000", "2005", "2010", "2015"] + irrigation_types = ["ir", "rf"] + + # Initialize lists to collect DataArrays across years + fraction_da_list = [] + irrigated_fraction_da_list = [] + + # Initialize a dictionary to store datasets + crop_data = {} + + for year in years: + crop_data[year] = {} + for crop in crops: + crop_data[year][crop] = {} + for irrigation in irrigation_types: + dataset_name = f"MIRCA2000_cropping_area_{year}_{resolution}_{crop}_{irrigation}" + + crop_map = self.data_catalog.get_rasterdataset( + dataset_name, + bbox=self.bounds, + buffer=2, + ) + crop_map = crop_map.fillna(0) + + crop_data[year][crop][irrigation] = crop_map.assign_coords( + x=np.round(crop_map.coords["x"].values, decimals=6), + y=np.round(crop_map.coords["y"].values, decimals=6), + ) + + # Initialize variables for total calculations + total_cropped_area = None + total_crop_areas = {} + + # Calculate total crop areas and total cropped area + for crop in crops: + irrigated = crop_data[year][crop]["ir"] + rainfed = crop_data[year][crop]["rf"] + + total_crop = irrigated + rainfed + total_crop_areas[crop] = total_crop + + if total_cropped_area is None: + total_cropped_area = total_crop.copy() + else: + total_cropped_area += total_crop + + # Initialize lists to collect DataArrays for this year + fraction_list = [] + irrigated_fraction_list = [] + + # Calculate the fraction of each crop to the total cropped area + for crop in crops: + fraction = total_crop_areas[crop] / total_cropped_area + + # Assign 'crop' as a coordinate + fraction = fraction.assign_coords(crop=crop) + + # Append to the list + fraction_list.append(fraction) + + # Concatenate the list of fractions into a single DataArray along the 'crop' dimension + fraction_da = xr.concat(fraction_list, dim="crop") + + # Assign the 'year' coordinate and expand dimensions to include 'year' + fraction_da = fraction_da.assign_coords(year=year).expand_dims(dim="year") + + # Append to the list of all years + fraction_da_list.append(fraction_da) + + # Calculate irrigated fractions for each crop and collect them + for crop in crops: + irrigated = crop_data[year][crop]["ir"].compute() + total_crop = total_crop_areas[crop] + irrigated_fraction = irrigated / total_crop + + # Assign 'crop' as a coordinate + irrigated_fraction = irrigated_fraction.assign_coords(crop=crop) + + # Append to the list + irrigated_fraction_list.append(irrigated_fraction) + + # Concatenate the list of irrigated fractions into a single DataArray along the 'crop' dimension + irrigated_fraction_da = xr.concat(irrigated_fraction_list, dim="crop") + + # Assign the 'year' coordinate and expand dimensions to include 'year' + irrigated_fraction_da = irrigated_fraction_da.assign_coords( + year=year + ).expand_dims(dim="year") + + # Append to the list of all years + irrigated_fraction_da_list.append(irrigated_fraction_da) + + # After processing all years, concatenate along the 'year' dimension + all_years_fraction_da = xr.concat(fraction_da_list, dim="year") + all_years_irrigated_fraction_da = xr.concat( + irrigated_fraction_da_list, dim="year" + ) + + # Save the concatenated DataArrays as NetCDF files + save_dir = Path(self.root).parent / "preprocessing" / "crops" / "MIRCA2000" + save_dir.mkdir(parents=True, exist_ok=True) + + output_filename = save_dir / "crop_area_fraction_all_years.nc" + all_years_fraction_da.to_netcdf(output_filename) + + output_filename = save_dir / "crop_irrigated_fraction_all_years.nc" + all_years_irrigated_fraction_da.to_netcdf(output_filename) + def setup_create_farms_simple( self, region_id_column="region_id", @@ -4079,122 +4256,8 @@ def setup_create_farms_simple( "Country" ].ffill() - # convert country names to ISO3 codes - iso3_codes = { - "Albania": "ALB", - "Algeria": "DZA", - "American Samoa": "ASM", - "Argentina": "ARG", - "Austria": "AUT", - "Bahamas": "BHS", - "Barbados": "BRB", - "Belgium": "BEL", - "Brazil": "BRA", - "Bulgaria": "BGR", - "Burkina Faso": "BFA", - "Chile": "CHL", - "Colombia": "COL", - "Côte d'Ivoire": "CIV", - "Croatia": "HRV", - "Cyprus": "CYP", - "Czech Republic": "CZE", - "Democratic Republic of the Congo": "COD", - "Denmark": "DNK", - "Dominica": "DMA", - "Ecuador": "ECU", - "Egypt": "EGY", - "Estonia": "EST", - "Ethiopia": "ETH", - "Fiji": "FJI", - "Finland": "FIN", - "France": "FRA", - "French Polynesia": "PYF", - "Georgia": "GEO", - "Germany": "DEU", - "Greece": "GRC", - "Grenada": "GRD", - "Guam": "GUM", - "Guatemala": "GTM", - "Guinea": "GIN", - "Honduras": "HND", - "India": "IND", - "Indonesia": "IDN", - "Iran (Islamic Republic of)": "IRN", - "Ireland": "IRL", - "Italy": "ITA", - "Japan": "JPN", - "Jamaica": "JAM", - "Jordan": "JOR", - "Korea, Rep. of": "KOR", - "Kyrgyzstan": "KGZ", - "Lao People's Democratic Republic": "LAO", - "Latvia": "LVA", - "Lebanon": "LBN", - "Lithuania": "LTU", - "Luxembourg": "LUX", - "Malta": "MLT", - "Morocco": "MAR", - "Myanmar": "MMR", - "Namibia": "NAM", - "Nepal": "NPL", - "Netherlands": "NLD", - "Nicaragua": "NIC", - "Northern Mariana Islands": "MNP", - "Norway": "NOR", - "Pakistan": "PAK", - "Panama": "PAN", - "Paraguay": "PRY", - "Peru": "PER", - "Philippines": "PHL", - "Poland": "POL", - "Portugal": "PRT", - "Puerto Rico": "PRI", - "Qatar": "QAT", - "Romania": "ROU", - "Saint Lucia": "LCA", - "Saint Vincent and the Grenadines": "VCT", - "Samoa": "WSM", - "Senegal": "SEN", - "Serbia": "SRB", - "Sweden": "SWE", - "Switzerland": "CHE", - "Thailand": "THA", - "Trinidad and Tobago": "TTO", - "Turkey": "TUR", - "Uganda": "UGA", - "United Kingdom": "GBR", - "United States of America": "USA", - "Uruguay": "URY", - "Venezuela (Bolivarian Republic of)": "VEN", - "Virgin Islands, United States": "VIR", - "Yemen": "YEM", - "Cook Islands": "COK", - "French Guiana": "GUF", - "Guadeloupe": "GLP", - "Martinique": "MTQ", - "Réunion": "REU", - "Canada": "CAN", - "China": "CHN", - "Guinea Bissau": "GNB", - "Hungary": "HUN", - "Lesotho": "LSO", - "Libya": "LBY", - "Malawi": "MWI", - "Mozambique": "MOZ", - "New Zealand": "NZL", - "Slovakia": "SVK", - "Slovenia": "SVN", - "Spain": "ESP", - "St. Kitts & Nevis": "KNA", - "Viet Nam": "VNM", - "Australia": "AUS", - "Djibouti": "DJI", - "Mali": "MLI", - "Togo": "TGO", - "Zambia": "ZMB", - } farm_sizes_per_region["ISO3"] = farm_sizes_per_region["Country"].map( - iso3_codes + COUNTRY_NAME_TO_ISO3 ) assert ( not farm_sizes_per_region["ISO3"].isna().any() @@ -4256,14 +4319,15 @@ def setup_create_farms_simple( len(region_farm_sizes) == 2 ), f"Found {len(region_farm_sizes) / 2} region_farm_sizes for {country_ISO3}" + # Extract holdings and agricultural area data region_n_holdings = ( region_farm_sizes.loc[ region_farm_sizes["Holdings/ agricultural area"] == "Holdings" ] .iloc[0] .drop(["Holdings/ agricultural area", "ISO3"]) - .replace("..", "0") - .astype(np.int64) + .replace("..", np.nan) + .astype(float) ) agricultural_area_db_ha = ( region_farm_sizes.loc[ @@ -4272,11 +4336,84 @@ def setup_create_farms_simple( ] .iloc[0] .drop(["Holdings/ agricultural area", "ISO3"]) - .replace("..", "0") - .astype(np.int64) + .replace("..", np.nan) + .astype(float) + ) + + # Calculate average sizes for each bin + average_sizes = {} + for bin_name in agricultural_area_db_ha.index: + bin_name = bin_name.strip() + if bin_name.startswith("<"): + # For '< 1 Ha', average is 0.5 Ha + average_size = 0.5 + elif bin_name.startswith(">"): + # For '> 1000 Ha', assume average is 1500 Ha + average_size = 1500 + else: + # For ranges like '5 - 10 Ha', calculate the midpoint + try: + min_size, max_size = bin_name.replace("Ha", "").split("-") + min_size = float(min_size.strip()) + max_size = float(max_size.strip()) + average_size = (min_size + max_size) / 2 + except ValueError: + # Default average size if parsing fails + average_size = 1 + average_sizes[bin_name] = average_size + + # Convert average sizes to a pandas Series + average_sizes_series = pd.Series(average_sizes) + + # Handle cases where entries are zero or missing + agricultural_area_db_ha_zero_or_nan = ( + agricultural_area_db_ha.isnull() | (agricultural_area_db_ha == 0) ) - agricultural_area_db = agricultural_area_db_ha * 10000 + region_n_holdings_zero_or_nan = region_n_holdings.isnull() | ( + region_n_holdings == 0 + ) + + if agricultural_area_db_ha_zero_or_nan.all(): + # All entries in agricultural_area_db_ha are zero or NaN + if not region_n_holdings_zero_or_nan.all(): + # Calculate agricultural_area_db_ha using average sizes and region_n_holdings + region_n_holdings = region_n_holdings.fillna(1).replace(0, 1) + agricultural_area_db_ha = ( + average_sizes_series * region_n_holdings + ) + else: + raise ValueError( + "Cannot calculate agricultural_area_db_ha: both datasets are zero or missing." + ) + elif region_n_holdings_zero_or_nan.all(): + # All entries in region_n_holdings are zero or NaN + if not agricultural_area_db_ha_zero_or_nan.all(): + # Calculate region_n_holdings using agricultural_area_db_ha and average sizes + agricultural_area_db_ha = agricultural_area_db_ha.fillna( + 1 + ).replace(0, 1) + region_n_holdings = ( + agricultural_area_db_ha / average_sizes_series + ) + else: + raise ValueError( + "Cannot calculate region_n_holdings: both datasets are zero or missing." + ) + else: + # Replace zeros and NaNs in both datasets to avoid division by zero + region_n_holdings = region_n_holdings.fillna(1).replace(0, 1) + agricultural_area_db_ha = agricultural_area_db_ha.fillna(1).replace( + 0, 1 + ) + + # Calculate total agricultural area in square meters + agricultural_area_db = ( + agricultural_area_db_ha * 10000 + ) # Convert Ha to m^2 + + # Calculate region farm sizes region_farm_sizes = agricultural_area_db / region_n_holdings + else: region_farm_sizes = farm_sizes_per_region.loc[(state, district, tehsil)] region_n_holdings = n_farms_per_region.loc[(state, district, tehsil)] @@ -4329,7 +4466,7 @@ def setup_create_farms_simple( continue number_of_agents_size_class = round( - region_n_holdings[size_class].compute().item() + region_n_holdings[size_class].item() ) # if there is agricultural land, but there are no agents rounded down, we assume there is one agent if ( @@ -4411,15 +4548,511 @@ def setup_create_farms_simple( farmers = pd.concat(all_agents, ignore_index=True) self.setup_farmers(farmers) + def setup_household_characteristics(self, maximum_age=85): + import gzip + from honeybees.library.raster import pixels_to_coords + + n_farmers = self.binary["agents/farmers/id"].size + farms = self.subgrid["agents/farmers/farms"] + + # get farmer locations + vertical_index = ( + np.arange(farms.shape[0]) + .repeat(farms.shape[1]) + .reshape(farms.shape)[farms != -1] + ) + horizontal_index = np.tile(np.arange(farms.shape[1]), farms.shape[0]).reshape( + farms.shape + )[farms != -1] + farms_flattened = farms.values[farms.values != -1] + + pixels = np.zeros((n_farmers, 2), dtype=np.int32) + pixels[:, 0] = np.round( + np.bincount(farms_flattened, horizontal_index) + / np.bincount(farms_flattened) + ).astype(int) + pixels[:, 1] = np.round( + np.bincount(farms_flattened, vertical_index) / np.bincount(farms_flattened) + ).astype(int) + + locations = pixels_to_coords(pixels + 0.5, farms.raster.transform.to_gdal()) + locations = gpd.GeoDataFrame( + geometry=gpd.points_from_xy(locations[:, 0], locations[:, 1]), + crs="EPSG:4326", + ) # convert locations to geodataframe + + # GLOPOP-S uses the GDL regions. So we need to get the GDL region for each farmer using their location + GDL_regions = self.data_catalog.get_geodataframe( + "GDL_regions_v4", geom=self.region, variables=["GDLcode"] + ) + GDL_region_per_farmer = gpd.sjoin( + locations, GDL_regions, how="left", predicate="within" + ) + + # ensure that each farmer has a region + assert GDL_region_per_farmer["GDLcode"].notna().all() + + # Load GLOPOP-S data. This is a binary file and has no proper loading in hydromt. So we use the data catalog to get the path and format the path with the regions and load it with NumPy + GLOPOP_S = self.data_catalog.get_source("GLOPOP-S") + + GLOPOP_S_attribute_names = [ + "HID", + "RELATE_HEAD", + "INCOME", + "WEALTH", + "RURAL", + "AGE", + "GENDER", + "EDUC", + "HHTYPE", + "HHSIZE_CAT", + "AGRI_OWNERSHIP", + "FLOOR", + "WALL", + "ROOF", + "SOURCE", + ] + + # Get list of unique GDL codes from farmer dataframe + attributes_to_include = ["HHSIZE_CAT", "AGE", "EDUC", "WEALTH"] + + for column in attributes_to_include: + GDL_region_per_farmer[column] = np.full( + len(GDL_region_per_farmer), -1, dtype=np.int32 + ) + + for GDL_region, farmers_GDL_region in GDL_region_per_farmer.groupby("GDLcode"): + with gzip.open(GLOPOP_S.path.format(region=GDL_region), "rb") as f: + GLOPOP_S_region = np.frombuffer(f.read(), dtype=np.int32) + + n_people = GLOPOP_S_region.size // len(GLOPOP_S_attribute_names) + GLOPOP_S_region = pd.DataFrame( + np.reshape( + GLOPOP_S_region, (len(GLOPOP_S_attribute_names), n_people) + ).transpose(), + columns=GLOPOP_S_attribute_names, + ) + + # select farmers only + GLOPOP_S_region = GLOPOP_S_region[GLOPOP_S_region["RURAL"] == 1].drop( + "RURAL", axis=1 + ) + + # shuffle GLOPOP-S data to avoid biases in that regard + GLOPOP_S_household_IDs = GLOPOP_S_region["HID"].unique() + np.random.shuffle(GLOPOP_S_household_IDs) # shuffle array in-place + GLOPOP_S_region = ( + GLOPOP_S_region.set_index("HID") + .loc[GLOPOP_S_household_IDs] + .reset_index() + ) + + # Select a sample of farmers from the database. Because the households were + # shuflled there is no need to pick random households, we can just take the first n_farmers. + # If there are not enough farmers in the region, we need to upsample the data. In this case + # we will just take the same farmers multiple times starting from the top. + GLOPOP_S_household_IDs = GLOPOP_S_region["HID"].values + + # first we mask out all consecutive duplicates + mask = np.concatenate( + ([True], GLOPOP_S_household_IDs[1:] != GLOPOP_S_household_IDs[:-1]) + ) + GLOPOP_S_household_IDs = GLOPOP_S_household_IDs[mask] + + GLOPOP_S_region_sampled = [] + if GLOPOP_S_household_IDs.size < len(farmers_GDL_region): + n_repetitions = len(farmers_GDL_region) // GLOPOP_S_household_IDs.size + max_household_ID = GLOPOP_S_household_IDs.max() + for i in range(n_repetitions): + GLOPOP_S_region_copy = GLOPOP_S_region.copy() + # increase the household ID to avoid duplicate household IDs. Using (i + 1) so that the original household IDs are not changed + # so that they can be used in the final "topping up" below. + GLOPOP_S_region_copy["HID"] = GLOPOP_S_region_copy["HID"] + ( + (i + 1) * max_household_ID + ) + GLOPOP_S_region_sampled.append(GLOPOP_S_region_copy) + requested_farmers = ( + len(farmers_GDL_region) % GLOPOP_S_household_IDs.size + ) + else: + requested_farmers = len(farmers_GDL_region) + + GLOPOP_S_household_IDs = GLOPOP_S_household_IDs[:requested_farmers] + GLOPOP_S_region_sampled.append( + GLOPOP_S_region[GLOPOP_S_region["HID"].isin(GLOPOP_S_household_IDs)] + ) + + GLOPOP_S_region_sampled = pd.concat( + GLOPOP_S_region_sampled, ignore_index=True + ) + assert GLOPOP_S_region_sampled["HID"].unique().size == len( + farmers_GDL_region + ) + + households_region = GLOPOP_S_region_sampled.groupby("HID") + # select only household heads + household_heads = households_region.apply( + lambda x: x[x["RELATE_HEAD"] == 1] + ) + assert len(household_heads) == len(farmers_GDL_region) + + # # age + # household_heads["AGE_continuous"] = np.full( + # len(household_heads), -1, dtype=np.int32 + # ) + age_class_to_age = { + 1: (0, 16), + 2: (16, 26), + 3: (26, 36), + 4: (36, 46), + 5: (46, 56), + 6: (56, 66), + 7: (66, maximum_age + 1), + } # exclusive + for age_class, age_range in age_class_to_age.items(): + household_heads_age_class = household_heads[ + household_heads["AGE"] == age_class + ] + household_heads.loc[household_heads_age_class.index, "AGE"] = ( + np.random.randint( + age_range[0], + age_range[1], + size=len(household_heads_age_class), + dtype=GDL_region_per_farmer["AGE"].dtype, + ) + ) + GDL_region_per_farmer.loc[farmers_GDL_region.index, "AGE"] = ( + household_heads["AGE"].values + ) + + # education level + GDL_region_per_farmer.loc[farmers_GDL_region.index, "EDUC"] = ( + household_heads["EDUC"].values + ) + + # household size + household_sizes_region = households_region.size().values.astype(np.int32) + GDL_region_per_farmer.loc[farmers_GDL_region.index, "HHSIZE_CAT"] = ( + household_sizes_region + ) + + # assert none of the household sizes are placeholder value -1 + assert (GDL_region_per_farmer["HHSIZE_CAT"] != -1).all() + assert (GDL_region_per_farmer["AGE"] != -1).all() + assert (GDL_region_per_farmer["EDUC"] != -1).all() + + self.set_binary( + GDL_region_per_farmer["HHSIZE_CAT"].values, + name="agents/farmers/household_size", + ) + self.set_binary( + GDL_region_per_farmer["AGE"].values, + name="agents/farmers/age_household_head", + ) + self.set_binary( + GDL_region_per_farmer["EDUC"].values, + name="agents/farmers/education_level", + ) + + def create_preferences(self): + # Risk aversion + preferences_country_level = self.data_catalog.get_dataframe( + "preferences_country", + variables=["country", "isocode", "patience", "risktaking"], + ).dropna() + + preferences_individual_level = self.data_catalog.get_dataframe( + "preferences_individual", + variables=["country", "isocode", "patience", "risktaking"], + ).dropna() + + def scale_to_range(x, new_min, new_max): + x_min = x.min() + x_max = x.max() + # Avoid division by zero + if x_max == x_min: + return pd.Series(new_min, index=x.index) + scaled_x = (x - x_min) / (x_max - x_min) * (new_max - new_min) + new_min + return scaled_x + + # Create 'risktaking_losses' + preferences_individual_level["risktaking_losses"] = scale_to_range( + preferences_individual_level["risktaking"] * -1, + new_min=-0.88, + new_max=-0.02, + ) + + # Create 'risktaking_gains' + preferences_individual_level["risktaking_gains"] = scale_to_range( + preferences_individual_level["risktaking"] * -1, + new_min=0.04, + new_max=0.94, + ) + + # Create 'discount' + preferences_individual_level["discount"] = scale_to_range( + preferences_individual_level["patience"] * -1, + new_min=0.15, + new_max=0.73, + ) + + # Create 'risktaking_losses' + preferences_country_level["risktaking_losses"] = scale_to_range( + preferences_country_level["risktaking"] * -1, + new_min=-0.88, + new_max=-0.02, + ) + + # Create 'risktaking_gains' + preferences_country_level["risktaking_gains"] = scale_to_range( + preferences_country_level["risktaking"] * -1, + new_min=0.04, + new_max=0.94, + ) + + # Create 'discount' + preferences_country_level["discount"] = scale_to_range( + preferences_country_level["patience"] * -1, + new_min=0.15, + new_max=0.73, + ) + + # List of variables for which to calculate the standard deviation + variables = ["discount", "risktaking_gains", "risktaking_losses"] + + # Convert the variables to numeric, coercing errors to NaN to handle non-numeric entries + for var in variables: + preferences_individual_level[var] = pd.to_numeric( + preferences_individual_level[var], errors="coerce" + ) + + # Group by 'isocode' and calculate the standard deviation for each variable + std_devs = preferences_individual_level.groupby("isocode")[variables].std() + + # Add a suffix '_std' to the column names to indicate standard deviation + std_devs = std_devs.add_suffix("_std").reset_index() + + # Merge the standard deviation data into 'preferences_country_level' on 'isocode' + preferences_country_level = preferences_country_level.merge( + std_devs, on="isocode", how="left" + ) + + preferences_country_level = preferences_country_level.drop( + ["patience", "risktaking"], axis=1 + ) + + return preferences_country_level + def setup_farmer_characteristics_simple( self, - irrigation_choice={ - "no": 1.0, - }, - risk_aversion_mean=0, - risk_aversion_standard_deviation=0.387, interest_rate=0.05, - discount_rate=0.1, + ): + n_farmers = self.binary["agents/farmers/id"].size + + preferences_global = self.create_preferences() + preferences_global.rename( + columns={ + "country": "Country", + "isocode": "ISO3", + "risktaking_losses": "Losses", + "risktaking_gains": "Gains", + "discount": "Discount", + "discount_std": "Discount_std", + "risktaking_losses_std": "Losses_std", + "risktaking_gains_std": "Gains_std", + }, + inplace=True, + ) + + GLOBIOM_regions = self.data_catalog.get_dataframe("GLOBIOM_regions_37") + GLOBIOM_regions["ISO3"] = GLOBIOM_regions["Country"].map(GLOBIOM_NAME_TO_ISO3) + # For my personal branch + GLOBIOM_regions.loc[GLOBIOM_regions["Country"] == "Switzerland", "Region37"] = ( + "EU_MidWest" + ) + assert not np.any(GLOBIOM_regions["ISO3"].isna()), "Missing ISO3 codes" + + ISO3_codes_region = self.geoms["areamaps/regions"]["ISO3"].unique() + GLOBIOM_regions_region = GLOBIOM_regions[ + GLOBIOM_regions["ISO3"].isin(ISO3_codes_region) + ]["Region37"].unique() + ISO3_codes_GLOBIOM_region = GLOBIOM_regions[ + GLOBIOM_regions["Region37"].isin(GLOBIOM_regions_region) + ]["ISO3"] + + donor_data = {} + for ISO3 in ISO3_codes_GLOBIOM_region: + region_risk_aversion_data = preferences_global[ + preferences_global["ISO3"] == ISO3 + ] + + region_risk_aversion_data = region_risk_aversion_data[ + [ + "Country", + "ISO3", + "Gains", + "Losses", + "Gains_std", + "Losses_std", + "Discount", + "Discount_std", + ] + ] + region_risk_aversion_data.reset_index(drop=True, inplace=True) + # Store pivoted data in dictionary with region_id as key + donor_data[ISO3] = region_risk_aversion_data + + # Concatenate all regional data into a single DataFrame with MultiIndex + donor_data = pd.concat(donor_data, names=["ISO3"]) + + # Drop crops with no data at all for these regions + donor_data = donor_data.dropna(axis=1, how="all") + + unique_regions = self.geoms["areamaps/regions"] + + data = self.donate_and_receive_crop_prices( + donor_data, unique_regions, GLOBIOM_regions + ) + + # Map to corresponding region + data_reset = data.reset_index(level="region_id") + data = data_reset.set_index("region_id") + region_ids = self.binary["agents/farmers/region_id"] + + # Set gains and losses + gains_array = pd.Series(region_ids).map(data["Gains"]).to_numpy() + gains_std = pd.Series(region_ids).map(data["Gains_std"]).to_numpy() + losses_array = pd.Series(region_ids).map(data["Losses"]).to_numpy() + losses_std = pd.Series(region_ids).map(data["Losses_std"]).to_numpy() + discount_array = pd.Series(region_ids).map(data["Discount"]).to_numpy() + discount_std = pd.Series(region_ids).map(data["Discount_std"]).to_numpy() + + try: + # income = self.binary["agents/farmers/income"] + pass + except KeyError: + self.logger.info("Income does not exist, generating random income..") + daily_non_farm_income_family = random.choices( + [50, 100, 200, 500], k=n_farmers + ) + self.set_binary( + daily_non_farm_income_family, + name="agents/farmers/income", + ) + + try: + household_size = self.binary["agents/farmers/household_size"] + except KeyError: + self.logger.info("Household size does not exist, generating random sizes..") + household_size = random.choices([1, 2, 3, 4, 5, 6, 7], k=n_farmers) + self.set_binary(household_size, name="agents/farmers/household_size") + + daily_consumption_per_capita = random.choices( + [50, 100, 200, 500], k=n_farmers + ) + self.set_binary( + daily_consumption_per_capita, + name="agents/farmers/daily_consumption_per_capita", + ) + + try: + education_levels = self.binary["agents/farmers/education_level"] + except KeyError: + self.logger.info( + "Education level does not exist, generating random levels.." + ) + education_levels = random.choices( + [1, 2, 3, 4, 5], k=n_farmers + ) # Random levels from 0-4 or your preferred scale + self.set_binary(education_levels, name="agents/farmers/education_level") + + try: + age = self.binary["agents/farmers/age_household_head"] + except KeyError: + self.logger.info( + "Age of household head does not exist, generating random ages.." + ) + age = random.choices( + range(18, 80), k=n_farmers + ) # Random ages between 18 and 80 + self.set_binary(age, name="agents/farmers/age_household_head") + + def normalize(array): + return (array - np.min(array)) / (np.max(array) - np.min(array)) + + combined_deviation_risk_aversion = ((2 / 6) * normalize(education_levels)) + ( + (4 / 6) * normalize(age) + ) + + # Generate random noise, positively correlated with education levels and age + # (Higher age / education level means more risk averse) + z = np.random.normal( + loc=0, scale=1, size=combined_deviation_risk_aversion.shape + ) + + # Initial gains_variation proportional to risk aversion + gains_variation = combined_deviation_risk_aversion * z + + current_std = np.std(gains_variation) + gains_variation = gains_variation * (gains_std / current_std) + + # Similarly for losses_variation + losses_variation = combined_deviation_risk_aversion * z + current_std_losses = np.std(losses_variation) + losses_variation = losses_variation * (losses_std / current_std_losses) + + # Add the generated noise to the original gains and losses arrays + gains_array_with_variation = np.clip(gains_array + gains_variation, -0.99, 0.99) + losses_array_with_variation = np.clip( + losses_array + losses_variation, -0.99, 0.99 + ) + + # Calculate intention factor based on age and education + # Intention factor scales negatively with age and positively with education level + intention_factor = normalize(education_levels) - normalize(age) + + # Adjust the intention factor to center it around a mean of 0.3 + # The total intention of age, education and neighbor effects can scale to 1 + intention_factor = intention_factor * 0.333 + 0.333 + + neutral_risk_aversion = np.mean( + [gains_array_with_variation, losses_array_with_variation], axis=0 + ) + + self.set_binary(neutral_risk_aversion, name="agents/farmers/risk_aversion") + self.set_binary( + gains_array_with_variation, name="agents/farmers/risk_aversion_gains" + ) + self.set_binary( + losses_array_with_variation, name="agents/farmers/risk_aversion_losses" + ) + self.set_binary(intention_factor, name="agents/farmers/intention_factor") + + # discount rate + discount_rate_variation_factor = ((2 / 6) * normalize(education_levels)) + ( + (4 / 6) * normalize(age) + ) + discount_rate_variation = discount_rate_variation_factor * z * -1 + + # Adjust the variations to have the desired overall standard deviation + current_std = np.std(discount_rate_variation) + discount_rate_variation = discount_rate_variation * (discount_std / current_std) + + # Apply the variations to the original discount rates and clip the values + discount_rate_with_variation = np.clip( + discount_array + discount_rate_variation, 0, 2 + ) + self.set_binary( + discount_rate_with_variation, + name="agents/farmers/discount_rate", + ) + + interest_rate = np.full(n_farmers, interest_rate, dtype=np.float32) + self.set_binary(interest_rate, name="agents/farmers/interest_rate") + + def setup_farmer_crop_calendar( + self, + year=2000, reduce_crops=False, replace_base=False, ): @@ -4428,23 +5061,28 @@ def setup_farmer_characteristics_simple( MIRCA_unit_grid = self.data_catalog.get_rasterdataset( "MIRCA2000_unit_grid", bbox=self.bounds, buffer=2 ) + crop_calendar = parse_MIRCA2000_crop_calendar( self.data_catalog, MIRCA_units=np.unique(MIRCA_unit_grid.values), ) + farmer_locations = get_farm_locations( + self.subgrid["agents/farmers/farms"], method="centroid" + ) + farmer_mirca_units = sample_from_map( MIRCA_unit_grid.values, - get_farm_locations(self.subgrid["agents/farmers/farms"], method="centroid"), + farmer_locations, MIRCA_unit_grid.raster.transform.to_gdal(), ) - # initialize the is_irrigated as -1 for all farmers - is_irrigated = np.full(n_farmers, -1, dtype=np.int32) + farmer_crops, is_irrigated = self.assign_crops_irrigation_farmers(year) + self.setup_farmer_irrigation_source(is_irrigated) crop_calendar_per_farmer = np.zeros((n_farmers, 3, 4), dtype=np.int32) for mirca_unit in np.unique(farmer_mirca_units): - n_farmers_mirca_unit = (farmer_mirca_units == mirca_unit).sum() + farmers_in_unit = np.where(farmer_mirca_units == mirca_unit)[0] area_per_crop_rotation = [] cropping_calenders_crop_rotation = [] @@ -4463,23 +5101,58 @@ def setup_farmer_characteristics_simple( cropping_calenders_crop_rotation ) - # select n crop rotations weighted by the area for each crop rotation - farmer_crop_rotations_idx = np.random.choice( - np.arange(len(area_per_crop_rotation)), - size=n_farmers_mirca_unit, - replace=True, - p=area_per_crop_rotation / area_per_crop_rotation.sum(), - ) - crop_calendar_per_farmer_mirca_unit = cropping_calenders_crop_rotation[ - farmer_crop_rotations_idx - ] - is_irrigated[farmer_mirca_units == mirca_unit] = ( - crop_calendar_per_farmer_mirca_unit[:, :, 1] == 1 - ).any(axis=1) + crops_in_unit = np.unique(farmer_crops[farmers_in_unit]) + for crop_id in crops_in_unit: + # Find rotations that include this crop + rotations_with_crop_idx = [] + for idx, rotation in enumerate(cropping_calenders_crop_rotation): + # Get crop IDs in the rotation, excluding -1 entries + crop_ids_in_rotation = rotation[:, 0] + crop_ids_in_rotation = crop_ids_in_rotation[ + crop_ids_in_rotation != -1 + ] + if crop_id in crop_ids_in_rotation: + rotations_with_crop_idx.append(idx) - crop_calendar_per_farmer[farmer_mirca_units == mirca_unit] = ( - crop_calendar_per_farmer_mirca_unit[:, :, [0, 2, 3, 4]] - ) + if not rotations_with_crop_idx: + print( + f"No rotations found for crop ID {crop_id} in mirca unit {mirca_unit}" + ) + continue + + # Get the area fractions and rotations for these indices + areas_with_crop = area_per_crop_rotation[rotations_with_crop_idx] + rotations_with_crop = cropping_calenders_crop_rotation[ + rotations_with_crop_idx + ] + + # Normalize the area fractions + total_area_for_crop = areas_with_crop.sum() + fractions = areas_with_crop / total_area_for_crop + + # Get farmers with this crop in the mirca_unit + farmers_with_crop_in_unit = farmers_in_unit[ + farmer_crops[farmers_in_unit] == crop_id + ] + + # Assign crop rotations to these farmers + assigned_rotation_indices = np.random.choice( + np.arange(len(rotations_with_crop)), + size=len(farmers_with_crop_in_unit), + replace=True, + p=fractions, + ) + + # Assign the crop calendars to the farmers + for farmer_idx, rotation_idx in zip( + farmers_with_crop_in_unit, assigned_rotation_indices + ): + assigned_rotation = rotations_with_crop[rotation_idx] + # Assign to farmer's crop calendar, taking columns [0, 2, 3, 4] + # Columns: [crop_id, planting_date, harvest_date, additional_attribute] + crop_calendar_per_farmer[farmer_idx] = assigned_rotation[ + :, [0, 2, 3, 4] + ] # Define constants for crop IDs WHEAT = 0 @@ -4499,9 +5172,9 @@ def setup_farmer_characteristics_simple( RAPESEED = 14 GROUNDNUTS = 15 # PULSES = 16 - CITRUS = 17 - DATE_PALM = 18 - GRAPES = 19 + # CITRUS = 17 + # # DATE_PALM = 18 + # # GRAPES = 19 # COTTON = 20 COCOA = 21 COFFEE = 22 @@ -4611,8 +5284,8 @@ def insert_other_variant_crop( ) # Change other annual / misc to one - most_common_check = [GROUNDNUTS, CITRUS, COCOA, COFFEE, OTHERS_ANNUAL] - replaced_value = [GROUNDNUTS, CITRUS, COCOA, COFFEE, OTHERS_ANNUAL] + most_common_check = [GROUNDNUTS, COCOA, COFFEE, OTHERS_ANNUAL] + replaced_value = [GROUNDNUTS, COCOA, COFFEE, OTHERS_ANNUAL] crop_calendar_per_farmer = replace_crop( crop_calendar_per_farmer, most_common_check, replaced_value ) @@ -4638,9 +5311,9 @@ def insert_other_variant_crop( crop_calendar_per_farmer, most_common_check, replaced_value ) - # Change perennial to one - most_common_check = [OIL_PALM, GRAPES, DATE_PALM, OTHERS_PERENNIAL] - replaced_value = [OIL_PALM, GRAPES, DATE_PALM, OTHERS_PERENNIAL] + # Change perennial to annual, otherwise counted double in esa dataset + most_common_check = [OIL_PALM, OTHERS_PERENNIAL] + replaced_value = [OTHERS_ANNUAL] crop_calendar_per_farmer = replace_crop( crop_calendar_per_farmer, most_common_check, replaced_value ) @@ -4688,56 +5361,287 @@ def insert_other_variant_crop( name="agents/farmers/crop_calendar_rotation_years", ) - if irrigation_choice == "random": - # randomly sample from irrigation sources - irrigation_source = np.random.choice( - list(self.dict["agents/farmers/irrigation_sources"].values()), - size=n_farmers, - ) - else: - assert isinstance(irrigation_choice, dict) - # convert irrigation sources to integers based on irrigation sources dictionary - # which was set previously - irrigation_choice_int = { - self.dict["agents/farmers/irrigation_sources"][i]: k - for i, k in irrigation_choice.items() - } - # pick irrigation source based on the probabilities - irrigation_source = np.random.choice( - list(irrigation_choice_int.keys()), - size=n_farmers, - p=np.array(list(irrigation_choice_int.values())) - / sum(irrigation_choice_int.values()), + def assign_crops_irrigation_farmers(self, year=2000): + # Define the directory and file paths + data_dir = Path(self.root).parent / "preprocessing" / "crops" / "MIRCA2000" + crop_area_file = data_dir / "crop_area_fraction_all_years.nc" + crop_irr_fraction_file = data_dir / "crop_irrigated_fraction_all_years.nc" + + # Load the DataArrays + all_years_fraction_da = xr.open_dataarray(crop_area_file) + all_years_irrigated_fraction_da = xr.open_dataarray(crop_irr_fraction_file) + + farmer_locations = get_farm_locations( + self.subgrid["agents/farmers/farms"], method="centroid" + ) + + crop_dict = { + "Wheat": 0, + "Maize": 1, + "Rice": 2, + "Barley": 3, + "Rye": 4, + "Millet": 5, + "Sorghum": 6, + "Soybeans": 7, + "Sunflower": 8, + "Potatoes": 9, + "Cassava": 10, + "Sugar_cane": 11, + "Sugar_beet": 12, + "Oil_palm": 13, + "Rapeseed": 14, + "Groundnuts": 15, + "Pulses": 16, + "Cotton": 20, + "Cocoa": 21, + "Coffee": 22, + "Others_perennial": 23, + "Fodder": 24, + "Others_annual": 25, + } + + area_fraction_2000 = all_years_fraction_da.sel(year=str(year)) + irrigated_fraction_2000 = all_years_irrigated_fraction_da.sel(year=str(year)) + # Fill nas as there is no diff between 0 or na in code and can cause issues + area_fraction_2000 = area_fraction_2000.fillna(0) + irrigated_fraction_2000 = irrigated_fraction_2000.fillna(0) + + crops_in_dataarray = area_fraction_2000.coords["crop"].values + + grid_id_da = create_grid_cell_id_array(all_years_fraction_da) + + ny, nx = area_fraction_2000.sizes["y"], area_fraction_2000.sizes["x"] + + n_cells = grid_id_da.max().item() + + farmer_cells = sample_from_map( + grid_id_da.values, + farmer_locations, + grid_id_da.raster.transform.to_gdal(), + ) + + crop_area_fractions = sample_from_map( + area_fraction_2000.values, + farmer_locations, + area_fraction_2000.raster.transform.to_gdal(), + ) + crop_irrigated_fractions = sample_from_map( + irrigated_fraction_2000.values, + farmer_locations, + irrigated_fraction_2000.raster.transform.to_gdal(), + ) + + n_farmers = self.binary["agents/farmers/id"].size + + # Prepare empty crop arrays + farmer_crops = np.full(n_farmers, -1, dtype=np.int32) + farmer_irrigated = np.full(n_farmers, 0, dtype=np.bool_) + + for i in range(n_cells): + farmers_cell_mask = farmer_cells == i + nr_farmers_cell = np.count_nonzero(farmers_cell_mask) + if nr_farmers_cell == 0: + continue + crop_area_fraction = crop_area_fractions[farmer_cells == i][0] + crop_irrigated_fraction = crop_irrigated_fractions[farmer_cells == i][0] + + if crop_area_fraction.sum() == 0: + # Expand the search radius until valid data is found + found_valid_neighbor = False + max_radius = max(nx, ny) # Maximum possible radius + radius = 1 + while not found_valid_neighbor and radius <= max_radius: + neighbor_ids = get_neighbor_cell_ids(i, nx, ny, radius) + for neighbor_id in neighbor_ids: + if neighbor_id not in farmer_cells: + continue + + neighbor_crop_area_fraction = crop_area_fractions[ + farmer_cells == neighbor_id + ][0] + if neighbor_crop_area_fraction.sum() != 0: + # Found valid neighbor + crop_area_fraction = neighbor_crop_area_fraction + crop_irrigated_fraction = crop_irrigated_fractions[ + farmer_cells == neighbor_id + ][0] + found_valid_neighbor = True + break + if not found_valid_neighbor: + radius += 1 # Increase the search radius + if not found_valid_neighbor: + # No valid data found even after expanding radius + print( + f"No valid data found for cell {i} after searching up to radius {radius - 1}." + ) + continue # Skip this cell + + farmer_indices_in_cell = np.where(farmers_cell_mask)[0] + + # ensure fractions sum to 1 + area_per_crop_rotation = crop_area_fraction / crop_area_fraction.sum() + + farmer_crop_rotations_idx = np.random.choice( + np.arange(len(area_per_crop_rotation)), + size=len(farmer_indices_in_cell), + replace=True, + p=area_per_crop_rotation, ) - self.set_binary(irrigation_source, name="agents/farmers/irrigation_source") - household_size = random.choices([1, 2, 3, 4, 5, 6, 7], k=n_farmers) - self.set_binary(household_size, name="agents/farmers/household_size") + # Map sampled indices to crop names using crops_in_dataarray + farmer_crop_names = crops_in_dataarray[farmer_crop_rotations_idx] + # Map crop names to integer codes using crop_dict + farmer_crop_codes = [ + crop_dict[crop_name] for crop_name in farmer_crop_names + ] + # assign to farmers + farmer_crops[farmer_indices_in_cell] = farmer_crop_codes - daily_non_farm_income_family = random.choices([50, 100, 200, 500], k=n_farmers) - self.set_binary( - daily_non_farm_income_family, - name="agents/farmers/daily_non_farm_income_family", + # Determine irrigating farmers + chosen_crops = np.unique(farmer_crop_rotations_idx) + + for c in chosen_crops: + # Indices of farmers in the cell assigned to crop c + farmers_with_crop_c_in_cell = np.where(farmer_crop_rotations_idx == c)[ + 0 + ] + N_c = len(farmers_with_crop_c_in_cell) + f_c = crop_irrigated_fraction[c] + if np.isnan(f_c) or f_c <= 0: + continue # No irrigation for this crop + N_irrigated = int(round(N_c * f_c)) + if N_irrigated > 0: + # Randomly select N_irrigated farmers from the N_c farmers + irrigated_indices_in_cell = np.random.choice( + farmers_with_crop_c_in_cell, size=N_irrigated, replace=False + ) + # Get the overall farmer indices + overall_farmer_indices = farmer_indices_in_cell[ + irrigated_indices_in_cell + ] + # Set irrigation status to True for these farmers + farmer_irrigated[overall_farmer_indices] = True + + assert not ( + farmer_crops == -1 + ).any(), "Error: some farmers have no crops assigned" + + return farmer_crops, farmer_irrigated + + def setup_farmer_irrigation_source(self, irrigating_farmers): + fraction_sw_irrigation = "aeisw" + fraction_sw_irrigation_data = self.data_catalog.get_rasterdataset( + f"global_irrigation_area_{fraction_sw_irrigation}", + bbox=self.bounds, + buffer=2, + ) + fraction_gw_irrigation = "aeigw" + fraction_gw_irrigation_data = self.data_catalog.get_rasterdataset( + f"global_irrigation_area_{fraction_gw_irrigation}", + bbox=self.bounds, + buffer=2, ) - daily_consumption_per_capita = random.choices([50, 100, 200, 500], k=n_farmers) - self.set_binary( - daily_consumption_per_capita, - name="agents/farmers/daily_consumption_per_capita", + farmer_locations = get_farm_locations( + self.subgrid["agents/farmers/farms"], method="centroid" ) - risk_aversion = np.random.normal( - loc=risk_aversion_mean, - scale=risk_aversion_standard_deviation, - size=n_farmers, + # Determine which farmers are irrigating + grid_id_da = create_grid_cell_id_array(fraction_sw_irrigation_data) + ny, nx = ( + fraction_sw_irrigation_data.sizes["y"], + fraction_sw_irrigation_data.sizes["x"], ) - self.set_binary(risk_aversion, name="agents/farmers/risk_aversion") - interest_rate = np.full(n_farmers, interest_rate, dtype=np.float32) - self.set_binary(interest_rate, name="agents/farmers/interest_rate") + n_cells = grid_id_da.max().item() + n_farmers = self.binary["agents/farmers/id"].size + + farmer_cells = sample_from_map( + grid_id_da.values, + farmer_locations, + grid_id_da.raster.transform.to_gdal(), + ) + fraction_sw_irrigation_farmers = sample_from_map( + fraction_sw_irrigation_data.values, + farmer_locations, + fraction_sw_irrigation_data.raster.transform.to_gdal(), + ) + fraction_gw_irrigation_farmers = sample_from_map( + fraction_gw_irrigation_data.values, + farmer_locations, + fraction_gw_irrigation_data.raster.transform.to_gdal(), + ) - discount_rate = np.full(n_farmers, discount_rate, dtype=np.float32) - self.set_binary(discount_rate, name="agents/farmers/discount_rate") + # Initialize the irrigation_source array + irrigation_source = np.full(n_farmers, -1, dtype=np.int32) + + for i in range(n_cells): + farmers_cell_mask = farmer_cells == i # Boolean mask for farmers in cell i + farmers_cell_indices = np.where(farmers_cell_mask)[0] # Absolute indices + + irrigating_farmers_mask = irrigating_farmers[farmers_cell_mask] + num_irrigating_farmers = np.sum(irrigating_farmers_mask) + + if num_irrigating_farmers > 0: + fraction_sw = fraction_sw_irrigation_farmers[farmers_cell_mask][0] + fraction_gw = fraction_gw_irrigation_farmers[farmers_cell_mask][0] + + # Normalize fractions + total_fraction = fraction_sw + fraction_gw + + # Handle edge cases if there are irrigating farmers but no data on sw/gw + if total_fraction == 0: + # Find neighboring cells with valid data + neighbor_ids = get_neighbor_cell_ids(i, nx, ny) + found_valid_neighbor = False + + for neighbor_id in neighbor_ids: + if neighbor_id not in np.unique(farmer_cells): + continue + + neighbor_mask = farmer_cells == neighbor_id + fraction_sw_neighbor = fraction_sw_irrigation_farmers[ + neighbor_mask + ][0] + fraction_gw_neighbor = fraction_gw_irrigation_farmers[ + neighbor_mask + ][0] + neighbor_total_fraction = ( + fraction_sw_neighbor + fraction_gw_neighbor + ) + + if neighbor_total_fraction > 0: + # Found valid neighbor + fraction_sw = fraction_sw_neighbor + fraction_gw = fraction_gw_neighbor + total_fraction = neighbor_total_fraction + + found_valid_neighbor = True + break + if not found_valid_neighbor: + # No valid neighboring cells found, handle accordingly + print(f"No valid data found for cell {i} and its neighbors.") + continue # Skip this cell + + # Normalize fractions + probabilities = np.array([fraction_sw, fraction_gw], dtype=np.float64) + probabilities_sum = probabilities.sum() + probabilities /= probabilities_sum + + # Indices of irrigating farmers in the region (absolute indices) + farmer_indices_in_region = farmers_cell_indices[irrigating_farmers_mask] + + # Assign irrigation sources using np.random.choice + irrigation_source[farmer_indices_in_region] = np.random.choice( + [0, 1], + size=len(farmer_indices_in_region), + p=probabilities, + ) + + # Update the irrigation_source attribute or return it as needed + self.set_binary(irrigation_source, name="agents/farmers/irrigation_source") + self.set_binary(irrigating_farmers, name="agents/farmers/irrigating_farmers") def setup_population(self): populaton_map = self.data_catalog.get_rasterdataset( @@ -5542,7 +6446,15 @@ def setup_discharge_observations(self, files): time_chunksize=1e99, # no chunking ) - def _write_grid(self, grid, var, files, is_updated): + def _write_grid( + self, + grid, + var, + files, + is_updated, + y_chunksize=XY_CHUNKSIZE, + x_chunksize=XY_CHUNKSIZE, + ): if is_updated[var]["updated"]: self.logger.info(f"Writing {var}") filename = var + ".zarr.zip" @@ -5557,7 +6469,56 @@ def _write_grid(self, grid, var, files, is_updated): # zarr cannot handle / in variable names grid.name = "data" assert hasattr(grid, "spatial_ref") - grid.to_zarr(filepath, mode="w") + + # Rechunk data variable if needed + if grid.chunks is None: + # Grid is not chunked, specify chunks and chunk the grid + chunksizes = { + "y": min(grid.y.size, y_chunksize), + "x": min(grid.x.size, x_chunksize), + } + chunks_tuple = tuple( + ( + chunksizes[dim] + if dim in chunksizes + else max(getattr(grid, dim).size, 1) + ) + for dim in grid.dims + ) + grid = grid.chunk(chunks_tuple) + data_chunks = chunks_tuple + else: + # Grid is already chunked; use existing chunks + data_chunks = tuple( + s[0] if len(set(s)) == 1 else s for s in grid.chunks + ) + + # Build the encoding dictionary for the data variable + encoding = { + grid.name: { + "compressor": compressor, + # Only specify chunks if the grid was not already chunked + **({"chunks": data_chunks} if grid.chunks is None else {}), + } + } + + # **Compute coordinate variables to avoid chunking issues** + for coord in grid.coords: + coord_da = grid.coords[coord] + if coord_da.ndim > 0 and coord_da.chunks is not None: + # Option 1: Rechunk the coordinate variable to match data variable + # grid.coords[coord] = coord_da.rechunk(data_chunks[-coord_da.ndim:]) + # Option 2: Compute the coordinate variable (since it's small) + grid.coords[coord] = coord_da.compute() + # Include coordinate in encoding without specifying chunks + encoding[coord] = {} + + # Now write to Zarr + grid.to_zarr( + filepath, + mode="w", + encoding=encoding, + ) # rasterio does not support boolean data, which is why # we convert it to uint8 before writing. These files are diff --git a/geb/setup/workflows/conversions.py b/geb/setup/workflows/conversions.py index 598b3bbb..c04bbc30 100644 --- a/geb/setup/workflows/conversions.py +++ b/geb/setup/workflows/conversions.py @@ -425,6 +425,255 @@ "South Sudan": "SSD", } +COUNTRY_NAME_TO_ISO3 = { + "Albania": "ALB", + "Algeria": "DZA", + "American Samoa": "ASM", + "Argentina": "ARG", + "Austria": "AUT", + "Bahamas": "BHS", + "Barbados": "BRB", + "Belgium": "BEL", + "Brazil": "BRA", + "Bulgaria": "BGR", + "Burkina Faso": "BFA", + "Chile": "CHL", + "Colombia": "COL", + "Côte d'Ivoire": "CIV", + "Croatia": "HRV", + "Cyprus": "CYP", + "Czech Republic": "CZE", + "Democratic Republic of the Congo": "COD", + "Denmark": "DNK", + "Dominica": "DMA", + "Ecuador": "ECU", + "Egypt": "EGY", + "Estonia": "EST", + "Ethiopia": "ETH", + "Fiji": "FJI", + "Finland": "FIN", + "France": "FRA", + "French Polynesia": "PYF", + "Georgia": "GEO", + "Germany": "DEU", + "Greece": "GRC", + "Grenada": "GRD", + "Guam": "GUM", + "Guatemala": "GTM", + "Guinea": "GIN", + "Honduras": "HND", + "India": "IND", + "Indonesia": "IDN", + "Iran (Islamic Republic of)": "IRN", + "Ireland": "IRL", + "Italy": "ITA", + "Japan": "JPN", + "Jamaica": "JAM", + "Jordan": "JOR", + "Korea, Rep. of": "KOR", + "Kyrgyzstan": "KGZ", + "Lao People's Democratic Republic": "LAO", + "Latvia": "LVA", + "Lebanon": "LBN", + "Lithuania": "LTU", + "Luxembourg": "LUX", + "Malta": "MLT", + "Morocco": "MAR", + "Myanmar": "MMR", + "Namibia": "NAM", + "Nepal": "NPL", + "Netherlands": "NLD", + "Nicaragua": "NIC", + "Northern Mariana Islands": "MNP", + "Norway": "NOR", + "Pakistan": "PAK", + "Panama": "PAN", + "Paraguay": "PRY", + "Peru": "PER", + "Philippines": "PHL", + "Poland": "POL", + "Portugal": "PRT", + "Puerto Rico": "PRI", + "Qatar": "QAT", + "Romania": "ROU", + "Saint Lucia": "LCA", + "Saint Vincent and the Grenadines": "VCT", + "Samoa": "WSM", + "Senegal": "SEN", + "Serbia": "SRB", + "Sweden": "SWE", + "Switzerland": "CHE", + "Thailand": "THA", + "Trinidad and Tobago": "TTO", + "Turkey": "TUR", + "Uganda": "UGA", + "United Kingdom": "GBR", + "United States of America": "USA", + "Uruguay": "URY", + "Venezuela (Bolivarian Republic of)": "VEN", + "Virgin Islands, United States": "VIR", + "Yemen": "YEM", + "Cook Islands": "COK", + "French Guiana": "GUF", + "Guadeloupe": "GLP", + "Martinique": "MTQ", + "Réunion": "REU", + "Canada": "CAN", + "China": "CHN", + "Guinea Bissau": "GNB", + "Hungary": "HUN", + "Lesotho": "LSO", + "Libya": "LBY", + "Malawi": "MWI", + "Mozambique": "MOZ", + "New Zealand": "NZL", + "Slovakia": "SVK", + "Slovenia": "SVN", + "Spain": "ESP", + "St. Kitts & Nevis": "KNA", + "Viet Nam": "VNM", + "Australia": "AUS", + "Djibouti": "DJI", + "Mali": "MLI", + "Togo": "TGO", + "Zambia": "ZMB", + "Afghanistan": "AFG", + "Andorra": "AND", + "Angola": "AGO", + "Anguilla": "AIA", + "Antarctica": "ATA", + "Antigua and Barbuda": "ATG", + "Armenia": "ARM", + "Aruba": "ABW", + "Azerbaijan": "AZE", + "Bahrain": "BHR", + "Bangladesh": "BGD", + "Belarus": "BLR", + "Belize": "BLZ", + "Benin": "BEN", + "Bermuda": "BMU", + "Bhutan": "BTN", + "Bolivia (Plurinational State of)": "BOL", + "Bonaire, Sint Eustatius and Saba": "BES", + "Bosnia and Herzegovina": "BIH", + "Botswana": "BWA", + "Bouvet Island": "BVT", + "British Indian Ocean Territory": "IOT", + "Brunei Darussalam": "BRN", + "Burundi": "BDI", + "Cabo Verde": "CPV", + "Cambodia": "KHM", + "Cameroon": "CMR", + "Central African Republic": "CAF", + "Chad": "TCD", + "Christmas Island": "CXR", + "Cocos (Keeling) Islands": "CCK", + "Comoros": "COM", + "Congo (the)": "COG", + "Costa Rica": "CRI", + "Cuba": "CUB", + "Curaçao": "CUW", + "Czechia": "CZE", + "Dominican Republic": "DOM", + "El Salvador": "SLV", + "Equatorial Guinea": "GNQ", + "Eritrea": "ERI", + "Eswatini": "SWZ", + "Falkland Islands": "FLK", + "Faroe Islands": "FRO", + "Gabon": "GAB", + "Gambia": "GMB", + "Ghana": "GHA", + "Gibraltar": "GIB", + "Greenland": "GRL", + "Guernsey": "GGY", + "Guyana": "GUY", + "Haiti": "HTI", + "Heard Island and McDonald Islands": "HMD", + "Holy See": "VAT", + "Hong Kong": "HKG", + "Iceland": "ISL", + "Iraq": "IRQ", + "Isle of Man": "IMN", + "Israel": "ISR", + "Jersey": "JEY", + "Kazakhstan": "KAZ", + "Kenya": "KEN", + "Kiribati": "KIR", + "Korea (the Democratic People's Republic of)": "PRK", + "Kuwait": "KWT", + "Liechtenstein": "LIE", + "Macao": "MAC", + "Madagascar": "MDG", + "Malaysia": "MYS", + "Maldives": "MDV", + "Marshall Islands": "MHL", + "Mauritania": "MRT", + "Mauritius": "MUS", + "Mayotte": "MYT", + "Mexico": "MEX", + "Micronesia (Federated States of)": "FSM", + "Moldova": "MDA", + "Monaco": "MCO", + "Mongolia": "MNG", + "Montenegro": "MNE", + "Montserrat": "MSR", + "Nauru": "NRU", + "Niger": "NER", + "Nigeria": "NGA", + "Niue": "NIU", + "Norfolk Island": "NFK", + "Oman": "OMN", + "Palau": "PLW", + "Palestine, State of": "PSE", + "Papua New Guinea": "PNG", + "Pitcairn": "PCN", + "Russian Federation": "RUS", + "Rwanda": "RWA", + "Saint Barthélemy": "BLM", + "Saint Helena, Ascension and Tristan da Cunha": "SHN", + "Saint Martin (French part)": "MAF", + "Saint Pierre and Miquelon": "SPM", + "San Marino": "SMR", + "Sao Tome and Principe": "STP", + "Saudi Arabia": "SAU", + "Seychelles": "SYC", + "Sierra Leone": "SLE", + "Singapore": "SGP", + "Sint Maarten (Dutch part)": "SXM", + "Solomon Islands": "SLB", + "Somalia": "SOM", + "South Africa": "ZAF", + "South Georgia and the South Sandwich Islands": "SGS", + "South Sudan": "SSD", + "Sri Lanka": "LKA", + "Sudan": "SDN", + "Suriname": "SUR", + "Svalbard and Jan Mayen": "SJM", + "Syrian Arab Republic": "SYR", + "Taiwan": "TWN", + "Tajikistan": "TJK", + "Tanzania": "TZA", + "Timor-Leste": "TLS", + "Tokelau": "TKL", + "Tonga": "TON", + "Tunisia": "TUN", + "Turkmenistan": "TKM", + "Turks and Caicos Islands": "TCA", + "Tuvalu": "TUV", + "Ukraine": "UKR", + "United Arab Emirates": "ARE", + "United States Minor Outlying Islands": "UMI", + "Uzbekistan": "UZB", + "Vanuatu": "VUT", + "Virgin Islands (British)": "VGB", + "Wallis and Futuna": "WLF", + "Western Sahara": "ESH", + "Zimbabwe": "ZWE", + "Åland Islands": "ALA", +} + + GLOBIOM_NAME_TO_ISO3 = { "Argentina": "ARG", "Australia": "AUS", diff --git a/geb/setup/workflows/crop_calendars.py b/geb/setup/workflows/crop_calendars.py index f331ae69..8bf3df96 100644 --- a/geb/setup/workflows/crop_calendars.py +++ b/geb/setup/workflows/crop_calendars.py @@ -173,6 +173,7 @@ def parse_MIRCA2000_crop_calendar(data_catalog, MIRCA_units): ).path MIRCA2000_data = {} + MIRCA2000_data = parse_MIRCA_file( MIRCA2000_data, rainfed_crop_calendar_fp, diff --git a/geb/setup/workflows/soilgrids.py b/geb/setup/workflows/soilgrids.py index 36e3cc4b..a8f9f03f 100644 --- a/geb/setup/workflows/soilgrids.py +++ b/geb/setup/workflows/soilgrids.py @@ -560,7 +560,7 @@ def load_soilgrids(data_catalog, grid, region): hydraulic_conductivity = interpolate_soil_layers(hydraulic_conductivity) assert hydraulic_conductivity.min() >= 1e-7 - assert hydraulic_conductivity.max() <= 10 + assert hydraulic_conductivity.max() <= 15 # same for pore_size_index lambda lambda_ = get_pore_size_index(ds)